Kevin Hu
commited on
Commit
·
3ec35b2
1
Parent(s):
e195b4d
force eml file to be parsed by EMAIL (#2615)
Browse files### What problem does this PR solve?
#2613
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- api/apps/dataset_api.py +2 -0
- api/apps/document_app.py +2 -0
api/apps/dataset_api.py
CHANGED
|
@@ -381,6 +381,8 @@ def upload_documents(dataset_id):
|
|
| 381 |
doc["parser_id"] = ParserType.AUDIO.value
|
| 382 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 383 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
|
|
|
|
|
|
| 384 |
DocumentService.insert(doc)
|
| 385 |
|
| 386 |
FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
|
|
|
|
| 381 |
doc["parser_id"] = ParserType.AUDIO.value
|
| 382 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 383 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
| 384 |
+
if re.search(r"\.(eml)$", filename):
|
| 385 |
+
doc["parser_id"] = ParserType.EMAIL.value
|
| 386 |
DocumentService.insert(doc)
|
| 387 |
|
| 388 |
FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
|
api/apps/document_app.py
CHANGED
|
@@ -139,6 +139,8 @@ def web_crawl():
|
|
| 139 |
doc["parser_id"] = ParserType.AUDIO.value
|
| 140 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 141 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
|
|
|
|
|
|
| 142 |
DocumentService.insert(doc)
|
| 143 |
FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
|
| 144 |
except Exception as e:
|
|
|
|
| 139 |
doc["parser_id"] = ParserType.AUDIO.value
|
| 140 |
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
| 141 |
doc["parser_id"] = ParserType.PRESENTATION.value
|
| 142 |
+
if re.search(r"\.(eml)$", filename):
|
| 143 |
+
doc["parser_id"] = ParserType.EMAIL.value
|
| 144 |
DocumentService.insert(doc)
|
| 145 |
FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
|
| 146 |
except Exception as e:
|