Spaces:

leonarb
/

olmocr-demo

Running

File size: 2,355 Bytes

5827499
d5f7d0d
af75cff
d45f3e7
5827499
d5f7d0d
8be5494
5827499
d5f7d0d
 
 
5827499
d5f7d0d
fff0f58
5827499
d5f7d0d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5827499
d5f7d0d
 
 
 
 
 
5827499
 
 
d5f7d0d
 
 
5827499
fff0f58
 
d5f7d0d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d45f3e7
 
d5f7d0d

import gradio as gr
import tempfile
from io import BytesIO
from PIL import Image
from ebooklib import epub
from olmocr.model import process_pdf  # your forked olmocr model

def process_pdf_to_epub(pdf_file, title, author):
    # Run the OCR + LLM pipeline from olmocr
    print("Starting PDF processing...")
    page_results = process_pdf(pdf_file.name)

    # Create the EPUB book
    book = epub.EpubBook()
    book.set_identifier("id123456")
    book.set_title(title if title else "Untitled Document")
    book.set_language("en")
    if author:
        book.add_author(author)

    # Try to use the first page as cover
    try:
        with Image.open(pdf_file.name) as img:
            img.convert("RGB").save("cover.jpg", "JPEG")
            with open("cover.jpg", "rb") as f:
                cover_data = f.read()
                book.set_cover("cover.jpg", cover_data)
    except Exception as e:
        print("Could not generate cover:", e)

    # Add chapters from pages
    chapters = []
    for i, page in enumerate(page_results):
        text = page.get("decoded", {}).get("natural_text", "")
        if not text.strip():
            continue
        chapter = epub.EpubHtml(title=f"Page {i+1}", file_name=f"page_{i+1}.xhtml", lang="en")
        chapter.content = f"<h1>Page {i+1}</h1><p>{text.replace('\n', '<br/>')}</p>"
        book.add_item(chapter)
        chapters.append(chapter)

    # Define spine and table of contents
    book.toc = chapters
    book.spine = ["nav"] + chapters
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # Write to in-memory buffer
    output_buffer = BytesIO()
    epub.write_epub(output_buffer, book)
    output_buffer.seek(0)
    return output_buffer

with gr.Blocks() as demo:
    gr.Markdown("# PDF to EPUB Converter\nPowered by `olmOCR`")
    with gr.Row():
        pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
    title = gr.Textbox(label="EPUB Title", placeholder="Optional title")
    author = gr.Textbox(label="Author", placeholder="Optional author name")
    convert_button = gr.Button("Convert to EPUB")
    epub_output = gr.File(label="Download EPUB", file_types=[".epub"])

    convert_button.click(
        fn=process_pdf_to_epub,
        inputs=[pdf_input, title, author],
        outputs=epub_output
    )

if __name__ == "__main__":
    demo.launch()