Add support for downloading PowerPoint PPT files
Browse files- api/docs.py +5 -0
api/docs.py
CHANGED
@@ -229,6 +229,11 @@ async def doc_to_txt(doc_id: str, url: str, client: AsyncClient) -> str:
|
|
229 |
applied_revision = apply_docx_revisions(zipfile.ZipFile(bytes))
|
230 |
extracted_data = await extract_bytes(applied_revision.read(), FORMAT_MIME_TYPES[".docx"], config=KREUZBERG_CONFIG)
|
231 |
final_text = extracted_data.content
|
|
|
|
|
|
|
|
|
|
|
232 |
else:
|
233 |
if ext in FORMAT_MIME_TYPES: # file extension is supported
|
234 |
extracted_data = await extract_bytes(bytes.read(), FORMAT_MIME_TYPES[ext], config=KREUZBERG_CONFIG)
|
|
|
229 |
applied_revision = apply_docx_revisions(zipfile.ZipFile(bytes))
|
230 |
extracted_data = await extract_bytes(applied_revision.read(), FORMAT_MIME_TYPES[".docx"], config=KREUZBERG_CONFIG)
|
231 |
final_text = extracted_data.content
|
232 |
+
elif ext == ".ppt":
|
233 |
+
logging.debug(f"Converting {filename} .ppt --> .pptx")
|
234 |
+
docx_bytes = await convert_file(bytes, doc_id, "ppt", "pptx")
|
235 |
+
extracted_data = await extract_bytes(docx_bytes.read(), FORMAT_MIME_TYPES[".pptx"], config=KREUZBERG_CONFIG)
|
236 |
+
final_text = extracted_data.content
|
237 |
else:
|
238 |
if ext in FORMAT_MIME_TYPES: # file extension is supported
|
239 |
extracted_data = await extract_bytes(bytes.read(), FORMAT_MIME_TYPES[ext], config=KREUZBERG_CONFIG)
|