Game4all commited on
Commit
de87f05
·
verified ·
1 Parent(s): 9f79248

Add support for downloading PowerPoint PPT files

Browse files
Files changed (1) hide show
  1. api/docs.py +5 -0
api/docs.py CHANGED
@@ -229,6 +229,11 @@ async def doc_to_txt(doc_id: str, url: str, client: AsyncClient) -> str:
229
  applied_revision = apply_docx_revisions(zipfile.ZipFile(bytes))
230
  extracted_data = await extract_bytes(applied_revision.read(), FORMAT_MIME_TYPES[".docx"], config=KREUZBERG_CONFIG)
231
  final_text = extracted_data.content
 
 
 
 
 
232
  else:
233
  if ext in FORMAT_MIME_TYPES: # file extension is supported
234
  extracted_data = await extract_bytes(bytes.read(), FORMAT_MIME_TYPES[ext], config=KREUZBERG_CONFIG)
 
229
  applied_revision = apply_docx_revisions(zipfile.ZipFile(bytes))
230
  extracted_data = await extract_bytes(applied_revision.read(), FORMAT_MIME_TYPES[".docx"], config=KREUZBERG_CONFIG)
231
  final_text = extracted_data.content
232
+ elif ext == ".ppt":
233
+ logging.debug(f"Converting {filename} .ppt --> .pptx")
234
+ docx_bytes = await convert_file(bytes, doc_id, "ppt", "pptx")
235
+ extracted_data = await extract_bytes(docx_bytes.read(), FORMAT_MIME_TYPES[".pptx"], config=KREUZBERG_CONFIG)
236
+ final_text = extracted_data.content
237
  else:
238
  if ext in FORMAT_MIME_TYPES: # file extension is supported
239
  extracted_data = await extract_bytes(bytes.read(), FORMAT_MIME_TYPES[ext], config=KREUZBERG_CONFIG)