leonarb commited on
Commit
fff0f58
·
verified ·
1 Parent(s): a67d3a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -0
app.py CHANGED
@@ -8,6 +8,7 @@ import gradio as gr
8
  from PIL import Image
9
  from PyPDF2 import PdfReader
10
  from ebooklib import epub
 
11
  from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
12
 
13
  from olmocr.data.renderpdf import render_pdf_to_base64png
@@ -59,6 +60,34 @@ def ocr_page(pdf_path, page_num):
59
  decoded = processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
60
  return decoded[0] if decoded else ""
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  def convert_pdf_to_epub(pdf_file, title, author, language):
63
  # Save the uploaded file to a temporary path
64
  tmp_pdf_path = "/tmp/uploaded.pdf"
 
8
  from PIL import Image
9
  from PyPDF2 import PdfReader
10
  from ebooklib import epub
11
+ from pdf2image import convert_from_path
12
  from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
13
 
14
  from olmocr.data.renderpdf import render_pdf_to_base64png
 
60
  decoded = processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
61
  return decoded[0] if decoded else ""
62
 
63
+ def create_epub_from_text(text, output_path, title, author, language, cover_image):
64
+ book = epub.EpubBook()
65
+
66
+ # Set metadata
67
+ book.set_title(title)
68
+ book.set_language(language)
69
+ book.add_author(author)
70
+
71
+ # Add cover image
72
+ with open(cover_image, "rb") as cover_file:
73
+ cover_data = cover_file.read()
74
+ cover_item = epub.EpubItem(uid="cover", file_name="cover.jpg", media_type="image/jpeg", content=cover_data)
75
+ book.add_item(cover_item)
76
+
77
+ # Create a chapter for the content
78
+ chapter = epub.EpubHtml(title="Content", file_name="content.xhtml", lang=language)
79
+ chapter.set_content(f"<html><body><h1>{title}</h1><p>{text}</p></body></html>")
80
+ book.add_item(chapter)
81
+
82
+ # Define Table of Contents (TOC)
83
+ book.toc = (epub.Link("content.xhtml", "Content", "content"),)
84
+
85
+ # Add default NCX and OPF files
86
+ book.add_item(epub.EpubNav())
87
+
88
+ # Write the EPUB file
89
+ epub.write_epub(output_path, book)
90
+
91
  def convert_pdf_to_epub(pdf_file, title, author, language):
92
  # Save the uploaded file to a temporary path
93
  tmp_pdf_path = "/tmp/uploaded.pdf"