Spaces:

awacke1
/

GradioFolderTree

Build error

App Files Files Community

awacke1 commited on 3 days ago

Commit

5fe90b3

verified ·

1 Parent(s): f367c7b

Update app.py

Browse files

Files changed (1) hide show

app.py +217 -183

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import io
 import base64
 from collections import defaultdict
 from PIL import Image
 # Document Generation Libs
 from docx import Document
@@ -20,17 +21,20 @@ from reportlab.lib.units import inch
 from reportlab.pdfbase import pdfmetrics
 from reportlab.pdfbase.ttfonts import TTFont
-# AI and Media Libs
-from openai import AzureOpenAI
 import fitz  # PyMuPDF
 # --- Configuration & Setup ---
 CWD = Path.cwd()
 OUTPUT_DIR = CWD / "generated_outputs"
 PREVIEW_DIR = CWD / "previews"
 FONT_DIR = CWD
 OUTPUT_DIR.mkdir(exist_ok=True)
 PREVIEW_DIR.mkdir(exist_ok=True)
 LAYOUTS = {
     "A4 Portrait": {"size": A4},
@@ -39,40 +43,6 @@ LAYOUTS = {
     "Letter Landscape": {"size": landscape(letter)},
 }
-# 🧠 Initialize Azure OpenAI Client
-# NOTE: This requires AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY in your environment.
-try:
-    client = AzureOpenAI(
-        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
-        api_version="2024-05-01-preview",
-        api_key=os.getenv("AZURE_OPENAI_API_KEY"),
-    )
-    AZURE_CLIENT_AVAILABLE = True
-except Exception as e:
-    print("Warning: Azure OpenAI client could not be initialized. Text generation will use dummy data.")
-    print(f"Error: {e}")
-    client = None
-    AZURE_CLIENT_AVAILABLE = False
-# 📖 Map UI model names to your actual Azure deployment names.
-# YOU MUST CHANGE THESE DEPLOYMENT NAMES to match your Azure setup.
-AZURE_DEPLOYMENT_NAMES = {
-    # Chat / Vision Models
-    "gpt-4o": "your-gpt-4o-deployment-name",
-    "gpt-4.1": "your-gpt-4.1-deployment-name",
-    "gpt-4.1-mini": "your-gpt-4.1-mini-deployment-name",
-    "gpt-4o-mini": "your-gpt-4o-mini-deployment-name",
-    "gpt-4o-realtime-preview": "your-gpt-4o-realtime-deployment-name",
-    # Reasoning Models
-    "o1-mini": "your-o1-mini-deployment-name",
-    "o3-mini": "your-o3-mini-deployment-name",
-    "o4-mini": "your-o4-mini-deployment-name",
-    # Transcription Models
-    "gpt-4o-transcribe": "your-gpt-4o-transcribe-deployment",
-    "gpt-4o-mini-transcribe": "your-gpt-4o-mini-transcribe-deployment",
-}
 # --- ✍️ Document Generation Engines ---
 def create_pdf(md_content, font_name, emoji_font, pagesize, num_columns):
@@ -125,94 +95,61 @@ def markdown_to_story(markdown_text: str, font_name: str, emoji_font: str):
     """📜 Translates Markdown text into a sequence of ReportLab flowables for PDF rendering."""
     styles = getSampleStyleSheet()
     bold_font = f"{font_name}-Bold" if font_name != "Helvetica" else "Helvetica-Bold"
-    style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, fontSize=10)
-    style_h1 = ParagraphStyle('h1', fontName=bold_font, spaceBefore=12, fontSize=24)
     story, first_heading = [], True
     for line in markdown_text.split('\n'):
-        content, style = line, style_normal
-        if line.startswith("# "):
             if not first_heading: story.append(PageBreak())
-            content, style, first_heading = line.lstrip('# '), style_h1, False
-        formatted_content = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', content)
         final_content = apply_emoji_font(formatted_content, emoji_font)
-        story.append(Paragraph(final_content, style))
     return story
-# --- 🔮 Omni-Model Processing ---
-def process_text_input(prompt, model_deployment_name):
-    """💬 Sends a text prompt to the Azure OpenAI model and gets a response."""
-    if not AZURE_CLIENT_AVAILABLE: return "Azure OpenAI client not configured. This is dummy text."
-    completion = client.chat.completions.create(
-        model=model_deployment_name,
-        messages=[{"role": "user", "content": prompt}]
-    )
-    return completion.choices[0].message.content
-def process_image_input(image_file, prompt, model_deployment_name):
-    """🖼️ Encodes an image and sends it with a prompt to the Azure OpenAI model."""
-    if not AZURE_CLIENT_AVAILABLE: return "Azure OpenAI client not configured. This is a dummy image description."
-    with Image.open(image_file.name) as img:
-        with io.BytesIO() as output:
-            img.save(output, format="PNG")
-            base64_image = base64.b64encode(output.getvalue()).decode("utf-8")
-    response = client.chat.completions.create(
-        model=model_deployment_name,
-        messages=[{"role": "user", "content": [
-            {"type": "text", "text": prompt},
-            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
-        ]}]
-    )
-    return response.choices[0].message.content
-def process_audio_input(audio_file, prompt, chat_model_deployment, transcribe_model_deployment):
-    """🎤 Transcribes audio and sends the text with a prompt to the Azure OpenAI model."""
-    if not AZURE_CLIENT_AVAILABLE: return "Azure OpenAI client not configured. This is a dummy audio summary."
-    with open(audio_file.name, "rb") as f:
-        transcription = client.audio.transcriptions.create(
-            model=transcribe_model_deployment,
-            file=f
-        ).text
-    full_prompt = f"{prompt}\n\nAudio Transcription:\n{transcription}"
-    return process_text_input(full_prompt, chat_model_deployment)
-def process_pdf_input(pdf_file, prompt, model_deployment_name, progress):
-    """📄 Performs OCR on a PDF by sending pages as images to the AI model."""
-    if not AZURE_CLIENT_AVAILABLE: return "Azure OpenAI client not configured. This is a dummy PDF summary."
-    all_extracted_text = []
-    doc = fitz.open(pdf_file.name)
-    # Process pages in pairs
-    for i in progress.tqdm(range(0, len(doc), 2), desc="Performing PDF OCR"):
-        page_images = []
-        messages = [{"type": "text", "text": prompt}]
-        # Get first page of the pair
-        page1 = doc.load_page(i)
-        pix1 = page1.get_pixmap(dpi=150)
-        img_bytes1 = pix1.tobytes("png")
-        base64_image1 = base64.b64encode(img_bytes1).decode("utf-8")
-        messages.append({"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image1}"}})
-        # Get second page if it exists
-        if i + 1 < len(doc):
-            page2 = doc.load_page(i + 1)
-            pix2 = page2.get_pixmap(dpi=150)
-            img_bytes2 = pix2.tobytes("png")
-            base64_image2 = base64.b64encode(img_bytes2).decode("utf-8")
-            messages.append({"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image2}"}})
-        response = client.chat.completions.create(
-            model=model_deployment_name,
-            messages=[{"role": "user", "content": messages}]
-        )
-        all_extracted_text.append(response.choices[0].message.content)
-    return "\n\n".join(all_extracted_text)
 # --- 🛠️ Helpers & Main API ---
@@ -231,8 +168,7 @@ def register_local_fonts():
                 emoji_font_name = font_name
             else:
                 text_font_names.append(font_name)
-        except Exception as e:
-            print(f"Could not register font {font_path.name}: {e}")
     if not text_font_names: text_font_names.append('Helvetica')
     return sorted(text_font_names), emoji_font_name
@@ -247,53 +183,134 @@ def create_pdf_preview(pdf_path: Path):
     """🏞️ Generates a PNG thumbnail for the first page of a PDF."""
     preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
     try:
-        doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap()
         pix.save(str(preview_path)); doc.close()
-        return str(preview_path)
     except: return None
-def generate_outputs_api(omni_files, omni_prompt, chat_model, transcribe_model, output_formats, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)):
     """🚀 The main entry point that orchestrates the entire multi-modal generation process."""
     if not omni_prompt and not omni_files: raise gr.Error("Please provide a prompt or upload at least one file.")
     if not output_formats: raise gr.Error("Please select at least one output format.")
-    chat_deployment = AZURE_DEPLOYMENT_NAMES.get(chat_model)
-    transcribe_deployment = AZURE_DEPLOYMENT_NAMES.get(transcribe_model)
-    if not chat_deployment: raise gr.Error(f"Deployment for model '{chat_model}' not found in configuration.")
     shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
     OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
-    # --- Step 1: Omni-Model Processing ---
     md_content = ""
-    # Process files first
     if omni_files:
-        # Check for multiple file types
-        file_paths = [Path(f.name) for f in omni_files]
-        extensions = {p.suffix.lower() for p in file_paths}
-        if '.md' in extensions:
-            md_content = "\n\n".join([p.read_text(encoding='utf-8') for p in file_paths if p.suffix.lower() == '.md'])
-        elif '.pdf' in extensions:
-             # For simplicity, we process only the first PDF if multiple are uploaded for OCR
-            pdf_file = next((f for f in omni_files if Path(f.name).suffix.lower() == '.pdf'), None)
-            ocr_prompt = omni_prompt if omni_prompt else "Extract all text from the following document pages."
-            md_content = process_pdf_input(pdf_file, ocr_prompt, chat_deployment, progress)
-        elif '.png' in extensions or '.jpg' in extensions or '.jpeg' in extensions:
-            image_file = next((f for f in omni_files if Path(f.name).suffix.lower() in ['.png', '.jpg', '.jpeg']), None)
-            md_content = process_image_input(image_file, omni_prompt, chat_deployment)
-        elif '.wav' in extensions or '.mp3' in extensions or '.m4a' in extensions:
-            if not transcribe_deployment: raise gr.Error(f"Deployment for model '{transcribe_model}' not found.")
-            audio_file = next((f for f in omni_files if Path(f.name).suffix.lower() in ['.wav', '.mp3', '.m4a']), None)
-            md_content = process_audio_input(audio_file, omni_prompt, chat_deployment, transcribe_deployment)
-    # If no files, process text prompt
     elif omni_prompt:
-        md_content = process_text_input(omni_prompt, chat_deployment)
-    if not md_content: raise gr.Error("Failed to generate source content from the provided input.")
-    # --- Step 2: Generate Selected Document Formats ---
-    generated_files = []
     for format_choice in progress.tqdm(output_formats, desc="Generating Formats"):
         time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper()
         if format_choice == "PDF":
@@ -302,50 +319,59 @@ def generate_outputs_api(omni_files, omni_prompt, chat_model, transcribe_model,
                     pagesize = LAYOUTS[layout_name]["size"]
                     final_pagesize = (pagesize[0] * page_w_mult, pagesize[1] * page_h_mult)
                     pdf_buffer = create_pdf(md_content, font_name, EMOJI_FONT_NAME, final_pagesize, num_columns)
-                    filename = f"Document_{time_str}_{layout_name.replace(' ','-')}_{font_name}.pdf"
                     output_path = OUTPUT_DIR / filename
                     with open(output_path, "wb") as f: f.write(pdf_buffer.getvalue())
-                    generated_files.append(output_path)
         elif format_choice == "DOCX":
-            docx_doc = create_docx(md_content)
-            filename = f"Document_{time_str}.docx"
-            output_path = OUTPUT_DIR / filename
-            docx_doc.save(output_path); generated_files.append(output_path)
         elif format_choice == "XLSX":
-            xlsx_book = create_xlsx(md_content)
-            filename = f"Outline_{time_str}.xlsx"
-            output_path = OUTPUT_DIR / filename
-            xlsx_book.save(output_path); generated_files.append(output_path)
-    gallery_previews = [create_pdf_preview(p) for p in generated_files if p.suffix == '.pdf']
-    final_gallery = [g for g in gallery_previews if g]
-    return md_content, final_gallery, [str(p) for p in generated_files]
 # --- 🎨 Gradio UI Definition ---
 AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts()
 with gr.Blocks(theme=gr.themes.Soft(), title="Omni-Model Document Generator") as demo:
-    gr.Markdown("# 🧠 Omni-Model Document Generator (PDF, DOCX, XLSX)")
-    gr.Markdown("Provide a prompt, or upload a Markdown, PDF, Image, or Audio file. The AI will process it, and you can generate documents from the result.")
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown("### ⚙️ Omni-Model Input")
-            chat_models = ["gpt-4o", "gpt-4.1", "gpt-4.1-mini", "gpt-4o-mini", "o1-mini", "o3-mini", "o4-mini"]
-            transcribe_models = ["gpt-4o-transcribe", "gpt-4o-mini-transcribe"]
-            selected_chat_model = gr.Dropdown(choices=chat_models, label="Select Chat/Vision/Reasoning Model", value=chat_models[0])
-            selected_transcribe_model = gr.Dropdown(choices=transcribe_models, label="Select Transcription Model (for audio)", value=transcribe_models[0])
-            omni_prompt = gr.Textbox(label="Prompt", lines=3, placeholder="Ask a question, or provide instructions for a file...")
-            omni_files = gr.File(label="Upload File(s) (Optional)", file_count="multiple", file_types=["image", ".wav", ".mp3", ".md", ".pdf"])
             gr.Markdown("### 📄 Output Settings")
             output_formats = gr.CheckboxGroup(choices=["PDF", "DOCX", "XLSX"], label="Select Output Formats", value=["PDF"])
-            with gr.Accordion("PDF Customization", open=True):
                 num_columns_slider = gr.Slider(label="Text Columns", minimum=1, maximum=4, step=1, value=1)
                 page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1)
                 page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1)
@@ -357,13 +383,21 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Omni-Model Document Generator") as
         with gr.Column(scale=2):
             gr.Markdown("### 🤖 AI Response (Source for Documents)")
             ai_response_output = gr.Markdown(label="AI Generated Content")
-            gr.Markdown("### 🖼️ Final Documents")
-            gallery_output = gr.Gallery(label="PDF Previews", show_label=False, elem_id="gallery", columns=3, height="auto", object_fit="contain")
-            downloadable_files_output = gr.Files(label="Download Generated Files")
-    generate_btn.click(fn=generate_outputs_api,
-                       inputs=[omni_files, omni_prompt, selected_chat_model, selected_transcribe_model, output_formats, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider],
-                       outputs=[ai_response_output, gallery_output, downloadable_files_output])
 if __name__ == "__main__":
-    demo.launch()

 import base64
 from collections import defaultdict
 from PIL import Image
+import json
 # Document Generation Libs
 from docx import Document
 from reportlab.pdfbase import pdfmetrics
 from reportlab.pdfbase.ttfonts import TTFont
+# Media Libs
 import fitz  # PyMuPDF
 # --- Configuration & Setup ---
 CWD = Path.cwd()
 OUTPUT_DIR = CWD / "generated_outputs"
 PREVIEW_DIR = CWD / "previews"
+UPLOAD_DIR = CWD / "uploads"
 FONT_DIR = CWD
+# Create necessary directories
 OUTPUT_DIR.mkdir(exist_ok=True)
 PREVIEW_DIR.mkdir(exist_ok=True)
+UPLOAD_DIR.mkdir(exist_ok=True)
 LAYOUTS = {
     "A4 Portrait": {"size": A4},
     "Letter Landscape": {"size": landscape(letter)},
 }
 # --- ✍️ Document Generation Engines ---
 def create_pdf(md_content, font_name, emoji_font, pagesize, num_columns):
     """📜 Translates Markdown text into a sequence of ReportLab flowables for PDF rendering."""
     styles = getSampleStyleSheet()
     bold_font = f"{font_name}-Bold" if font_name != "Helvetica" else "Helvetica-Bold"
+    style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, fontSize=10, leading=14)
+    style_h1 = ParagraphStyle('h1', fontName=bold_font, spaceBefore=12, fontSize=24, textColor=colors.HexColor("#1E3A8A"))
+    style_h2 = ParagraphStyle('h2', fontName=bold_font, spaceBefore=10, fontSize=18, textColor=colors.HexColor("#374151"))
+    style_h3 = ParagraphStyle('h3', fontName=bold_font, spaceBefore=8, fontSize=14, textColor=colors.HexColor("#4B5563"))
+    style_code = ParagraphStyle('Code', fontName='Courier', backColor=colors.whitesmoke, textColor=colors.darkred, borderWidth=1, borderColor=colors.lightgrey, padding=8)
     story, first_heading = [], True
     for line in markdown_text.split('\n'):
+        stripped_line = line.strip()
+        if not stripped_line:
+            story.append(Spacer(1, 0.1 * inch)); continue
+        # Determine the structural element and its style
+        content, style, extra_args = stripped_line, style_normal, {}
+        if stripped_line.startswith("# "):
             if not first_heading: story.append(PageBreak())
+            content, style, first_heading = stripped_line.lstrip('# '), style_h1, False
+        elif stripped_line.startswith("## "):
+            content, style = stripped_line.lstrip('## '), style_h2
+        elif stripped_line.startswith("### "):
+            content, style = stripped_line.lstrip('### '), style_h3
+        elif stripped_line.startswith(("- ", "* ")):
+            content, extra_args = stripped_line[2:], {'bulletText': '•'}
+        # Now, format the content string correctly for ReportLab
+        # Apply bold/italic first
+        formatted_content = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', content))
+        # Then, apply the emoji font tags. This order is crucial.
         final_content = apply_emoji_font(formatted_content, emoji_font)
+        story.append(Paragraph(final_content, style, **extra_args))
     return story
+# --- 🔮 Virtual AI Omni-Model Functions ---
+def process_text_input(prompt):
+    """💬 Simulates an AI response to a text prompt."""
+    return f"# Virtual AI Response\n\n**Your Prompt:**\n> {prompt}\n\n**Generated Content:**\n- This is a simulated response for your text input.\n- Here's an emoji: 😊"
+def process_image_input(image_path, prompt):
+    """🖼️ Simulates an AI description of an image."""
+    return f"# Virtual AI Image Analysis: {Path(image_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Generated Content:**\n1. Simulated analysis of the uploaded image.\n2. File type appears to be `{Path(image_path).suffix}`."
+def process_audio_input(audio_path, prompt):
+    """🎤 Simulates AI transcription and summarization of an audio file."""
+    return f"# Virtual AI Audio Summary: {Path(audio_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Simulated Transcription:**\n> \"This is a test of the emergency broadcast system.\"\n\n**Generated Summary:**\nThe audio is a test broadcast."
+def process_pdf_input(pdf_path, prompt, progress):
+    """📄 Simulates AI-powered OCR of a PDF document."""
+    progress(0.5, desc="Simulating PDF page processing...")
+    ocr_text = f"# Virtual AI OCR of: {Path(pdf_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Extracted Content (Simulated):**\n- **Page 1:** Simulated text from the first page.\n- **Page 2:** Simulated text from the second page."
+    progress(1.0, desc="PDF OCR Simulation Complete!")
+    return ocr_text
 # --- 🛠️ Helpers & Main API ---
                 emoji_font_name = font_name
             else:
                 text_font_names.append(font_name)
+        except: pass
     if not text_font_names: text_font_names.append('Helvetica')
     return sorted(text_font_names), emoji_font_name
     """🏞️ Generates a PNG thumbnail for the first page of a PDF."""
     preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
     try:
+        doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap(dpi=96)
         pix.save(str(preview_path)); doc.close()
+        return preview_path
     except: return None
+def build_file_explorer_html(generated_files, pdf_files_for_gallery):
+    """🗂️ Constructs the HTML/JS for the file explorer and PDF gallery."""
+    file_explorer_html = ""
+    file_icons = {".pdf": "📄", ".docx": "📝", ".xlsx": "📊"}
+    for file_path in generated_files:
+        icon = file_icons.get(file_path.suffix, '📎')
+        file_explorer_html += f"""
+        <a href="/file={file_path}" class="file-link" download="{file_path.name}">
+            <span class="file-icon">{icon}</span>
+            <span class="file-name">{file_path.name}</span>
+        </a>
+        """
+    gallery_items = []
+    for pdf_path in pdf_files_for_gallery:
+        preview_path = create_pdf_preview(pdf_path)
+        if preview_path:
+            with open(preview_path, "rb") as f:
+                img_base64 = base64.b64encode(f.read()).decode("utf-8")
+                gallery_items.append({
+                    "preview_src": f"data:image/png;base64,{img_base64}",
+                    "filename": pdf_path.name
+                })
+    gallery_html = ""
+    if gallery_items:
+        thumbs_html = ""
+        for item in gallery_items:
+            thumbs_html += f'<img src="{item["preview_src"]}" class="thumbnail" onclick="selectThumbnail(this, \'{item["preview_src"]}\', \'{item["filename"]}\')">'
+        gallery_html = f"""
+        <div class="gallery-container">
+            <div class="main-view">
+                <img id="main-image" src="{gallery_items[0]['preview_src']}" class="main-image">
+                <p id="main-filename">{gallery_items[0]['filename']}</p>
+            </div>
+            <div class="thumbnail-strip">{thumbs_html}</div>
+        </div>
+        """
+    html = f"""
+    <style>
+        .tabs {{ display: flex; border-bottom: 2px solid #ccc; }}
+        .tab-button {{ padding: 10px 15px; cursor: pointer; background: #f1f1f1; border: none; border-bottom: 2px solid transparent; outline: none; }}
+        .tab-button.active {{ background: #fff; border-top: 2px solid #007bff; border-left: 2px solid #ccc; border-right: 2px solid #ccc; border-bottom: 2px solid #fff; }}
+        .tab-content {{ display: none; padding: 15px; border: 1px solid #ccc; border-top: none; }}
+        .tab-content.active {{ display: block; }}
+        .file-explorer {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); gap: 10px; }}
+        .file-link {{ display: flex; align-items: center; padding: 10px; background: #f9f9f9; border-radius: 5px; text-decoration: none; color: #333; }}
+        .file-link:hover {{ background: #e9e9e9; }}
+        .file-icon {{ font-size: 2.5em; margin-right: 10px; }}
+        .gallery-container {{ display: flex; height: 500px; }}
+        .main-view {{ flex: 3; padding: 10px; display: flex; flex-direction: column; align-items: center; justify-content: center; }}
+        .main-image {{ max-width: 100%; max-height: 90%; }}
+        .thumbnail-strip {{ flex: 1; overflow-y: auto; padding: 5px; }}
+        .thumbnail {{ width: 100%; margin-bottom: 5px; cursor: pointer; border: 2px solid transparent; }}
+        .thumbnail.active {{ border-color: #007bff; }}
+    </style>
+    <div class="tabs">
+        <button class="tab-button active" onclick="openTab(event, 'explorer')">🗂️ File Explorer</button>
+        {'<button class="tab-button" onclick="openTab(event, \'gallery\')">🖼️ PDF Gallery</button>' if gallery_items else ''}
+    </div>
+    <div id="explorer" class="tab-content active">
+        <div class="file-explorer">{file_explorer_html}</div>
+    </div>
+    <div id="gallery" class="tab-content">
+        {gallery_html}
+    </div>
+    <script>
+        function openTab(evt, tabName) {{
+            var i, tabcontent, tablinks;
+            tabcontent = document.getElementsByClassName("tab-content");
+            for (i = 0; i < tabcontent.length; i++) {{ tabcontent[i].style.display = "none"; }}
+            tablinks = document.getElementsByClassName("tab-button");
+            for (i = 0; i < tablinks.length; i++) {{ tablinks[i].className = tablinks[i].className.replace(" active", ""); }}
+            document.getElementById(tabName).style.display = "block";
+            evt.currentTarget.className += " active";
+        }}
+        const mainImage = document.getElementById('main-image');
+        const mainFilename = document.getElementById('main-filename');
+        const thumbnails = document.querySelectorAll('.thumbnail');
+        if (thumbnails.length > 0) thumbnails[0].classList.add('active');
+        function selectThumbnail(selectedThumb, imgSrc, filename) {{
+            mainImage.src = imgSrc; mainFilename.textContent = filename;
+            thumbnails.forEach(t => t.classList.remove('active'));
+            selectedThumb.classList.add('active');
+        }};
+    </script>
+    """
+    return html
+def generate_outputs_api(omni_files, omni_prompt, output_formats, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)):
     """🚀 The main entry point that orchestrates the entire multi-modal generation process."""
     if not omni_prompt and not omni_files: raise gr.Error("Please provide a prompt or upload at least one file.")
     if not output_formats: raise gr.Error("Please select at least one output format.")
     shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
     OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
     md_content = ""
     if omni_files:
+        temp_paths = []
+        for f in omni_files:
+            temp_path = UPLOAD_DIR / Path(f.name).name
+            shutil.copyfile(f.name, temp_path)
+            temp_paths.append(temp_path)
+        file_path = temp_paths[0]
+        file_ext = file_path.suffix.lower()
+        if file_ext == '.md': md_content = "\n\n".join([p.read_text(encoding='utf-8') for p in temp_paths if p.suffix.lower() == '.md'])
+        elif file_ext == '.pdf': md_content = process_pdf_input(file_path, omni_prompt or "Extract text", progress)
+        elif file_ext in ['.png', '.jpg', '.jpeg']: md_content = process_image_input(file_path, omni_prompt or "Describe image")
+        elif file_ext in ['.wav', '.mp3']: md_content = process_audio_input(file_path, omni_prompt or "Summarize transcription")
     elif omni_prompt:
+        md_content = process_text_input(omni_prompt)
+    if not md_content: raise gr.Error("Failed to generate source content.")
+    generated_files, pdf_files_for_gallery = [], []
     for format_choice in progress.tqdm(output_formats, desc="Generating Formats"):
         time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper()
         if format_choice == "PDF":
                     pagesize = LAYOUTS[layout_name]["size"]
                     final_pagesize = (pagesize[0] * page_w_mult, pagesize[1] * page_h_mult)
                     pdf_buffer = create_pdf(md_content, font_name, EMOJI_FONT_NAME, final_pagesize, num_columns)
+                    filename = f"Document_{time_str}.pdf"
                     output_path = OUTPUT_DIR / filename
                     with open(output_path, "wb") as f: f.write(pdf_buffer.getvalue())
+                    generated_files.append(output_path); pdf_files_for_gallery.append(output_path)
         elif format_choice == "DOCX":
+            doc = create_docx(md_content); filename = f"Document_{time_str}.docx"
+            output_path = OUTPUT_DIR / filename; doc.save(output_path); generated_files.append(output_path)
         elif format_choice == "XLSX":
+            book = create_xlsx(md_content); filename = f"Outline_{time_str}.xlsx"
+            output_path = OUTPUT_DIR / filename; book.save(output_path); generated_files.append(output_path)
+    final_html_output = build_file_explorer_html(generated_files, pdf_files_for_gallery)
+    return md_content, final_html_output
 # --- 🎨 Gradio UI Definition ---
 AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts()
+SAMPLE_MARKDOWN = """# Deities Guide: Mythology and Moral Lessons
+1. 📜 **Introduction**
+   - **Purpose**: Explore deities, spirits, saints, and beings with their epic stories and morals!
+   - **Usage**: A guide for learning and storytelling across traditions. ️
+   - **Themes**: Justice ⚖️, faith 🙏, hubris 🏛️, redemption ✨, cosmic order 🌌.
+# ⚔️ Arthurian Legends
+ - **Merlin, Morgan le Fay, Arthur**: Mentor 🧙, rival 🧙‍♀️, son 👑.
+ - **Relation**: Family tests loyalty 🤝.
+ - **Lesson**: Honor 🎖️ vs. betrayal 🗡️.
+"""
 with gr.Blocks(theme=gr.themes.Soft(), title="Omni-Model Document Generator") as demo:
+    gr.Markdown("# 🧠 Omni-Model Document Generator")
+    gr.Markdown("Provide a prompt, or upload a file (MD, PDF, Image, Audio). A virtual AI will process it, and you can generate documents from the result.")
     with gr.Row():
         with gr.Column(scale=1):
+            with gr.Tabs():
+                with gr.TabItem("💬 Text"):
+                    text_prompt = gr.Textbox(label="Prompt", lines=5, placeholder="Ask a question or provide instructions...")
+                with gr.TabItem("🖼️ Image"):
+                    image_prompt = gr.Textbox(label="Image Prompt", lines=2, placeholder="e.g., Describe this picture")
+                    image_file = gr.File(label="Upload Image", file_types=["image"])
+                with gr.TabItem("🎤 Audio"):
+                    audio_prompt = gr.Textbox(label="Audio Prompt", lines=2, placeholder="e.g., Summarize this audio")
+                    audio_file = gr.File(label="Upload Audio", file_types=[".wav", ".mp3"])
+                with gr.TabItem("📄 Document"):
+                    doc_prompt = gr.Textbox(label="Document Prompt", lines=2, placeholder="e.g., Extract text from this PDF")
+                    doc_file = gr.File(label="Upload MD or PDF", file_types=[".md", ".pdf"])
             gr.Markdown("### 📄 Output Settings")
             output_formats = gr.CheckboxGroup(choices=["PDF", "DOCX", "XLSX"], label="Select Output Formats", value=["PDF"])
+            with gr.Accordion("PDF Customization", open=False):
                 num_columns_slider = gr.Slider(label="Text Columns", minimum=1, maximum=4, step=1, value=1)
                 page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1)
                 page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1)
         with gr.Column(scale=2):
             gr.Markdown("### 🤖 AI Response (Source for Documents)")
             ai_response_output = gr.Markdown(label="AI Generated Content")
+            gr.Markdown("### 🗂️ Generated Files")
+            file_explorer_output = gr.HTML(label="File Explorer & Gallery")
+    def master_process(p1, p2, p3, p4, f1, f2, f3, f4, *args):
+        # Determine active tab and route to the API
+        if f1: return generate_outputs_api([f1], p1 or "Describe this text", *args)
+        if f2: return generate_outputs_api([f2], p2 or "Describe this image", *args)
+        if f3: return generate_outputs_api([f3], p3 or "Summarize this audio", *args)
+        if f4: return generate_outputs_api([f4], p4 or "Process this document", *args)
+        if p1: return generate_outputs_api(None, p1, *args)
+        raise gr.Error("Please provide an input in one of the tabs.")
+    generate_btn.click(fn=master_process,
+                       inputs=[text_prompt, image_prompt, audio_prompt, doc_prompt, text_prompt, image_file, audio_file, doc_file, output_formats, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider],
+                       outputs=[ai_response_output, file_explorer_output])
 if __name__ == "__main__":
+    demo.launch(share=True)