Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ import io
|
|
8 |
import base64
|
9 |
from collections import defaultdict
|
10 |
from PIL import Image
|
|
|
11 |
|
12 |
# Document Generation Libs
|
13 |
from docx import Document
|
@@ -20,17 +21,20 @@ from reportlab.lib.units import inch
|
|
20 |
from reportlab.pdfbase import pdfmetrics
|
21 |
from reportlab.pdfbase.ttfonts import TTFont
|
22 |
|
23 |
-
#
|
24 |
-
from openai import AzureOpenAI
|
25 |
import fitz # PyMuPDF
|
26 |
|
27 |
# --- Configuration & Setup ---
|
28 |
CWD = Path.cwd()
|
29 |
OUTPUT_DIR = CWD / "generated_outputs"
|
30 |
PREVIEW_DIR = CWD / "previews"
|
|
|
31 |
FONT_DIR = CWD
|
|
|
|
|
32 |
OUTPUT_DIR.mkdir(exist_ok=True)
|
33 |
PREVIEW_DIR.mkdir(exist_ok=True)
|
|
|
34 |
|
35 |
LAYOUTS = {
|
36 |
"A4 Portrait": {"size": A4},
|
@@ -39,40 +43,6 @@ LAYOUTS = {
|
|
39 |
"Letter Landscape": {"size": landscape(letter)},
|
40 |
}
|
41 |
|
42 |
-
# 🧠 Initialize Azure OpenAI Client
|
43 |
-
# NOTE: This requires AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY in your environment.
|
44 |
-
try:
|
45 |
-
client = AzureOpenAI(
|
46 |
-
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
|
47 |
-
api_version="2024-05-01-preview",
|
48 |
-
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
|
49 |
-
)
|
50 |
-
AZURE_CLIENT_AVAILABLE = True
|
51 |
-
except Exception as e:
|
52 |
-
print("Warning: Azure OpenAI client could not be initialized. Text generation will use dummy data.")
|
53 |
-
print(f"Error: {e}")
|
54 |
-
client = None
|
55 |
-
AZURE_CLIENT_AVAILABLE = False
|
56 |
-
|
57 |
-
# 📖 Map UI model names to your actual Azure deployment names.
|
58 |
-
# YOU MUST CHANGE THESE DEPLOYMENT NAMES to match your Azure setup.
|
59 |
-
AZURE_DEPLOYMENT_NAMES = {
|
60 |
-
# Chat / Vision Models
|
61 |
-
"gpt-4o": "your-gpt-4o-deployment-name",
|
62 |
-
"gpt-4.1": "your-gpt-4.1-deployment-name",
|
63 |
-
"gpt-4.1-mini": "your-gpt-4.1-mini-deployment-name",
|
64 |
-
"gpt-4o-mini": "your-gpt-4o-mini-deployment-name",
|
65 |
-
"gpt-4o-realtime-preview": "your-gpt-4o-realtime-deployment-name",
|
66 |
-
# Reasoning Models
|
67 |
-
"o1-mini": "your-o1-mini-deployment-name",
|
68 |
-
"o3-mini": "your-o3-mini-deployment-name",
|
69 |
-
"o4-mini": "your-o4-mini-deployment-name",
|
70 |
-
# Transcription Models
|
71 |
-
"gpt-4o-transcribe": "your-gpt-4o-transcribe-deployment",
|
72 |
-
"gpt-4o-mini-transcribe": "your-gpt-4o-mini-transcribe-deployment",
|
73 |
-
}
|
74 |
-
|
75 |
-
|
76 |
# --- ✍️ Document Generation Engines ---
|
77 |
|
78 |
def create_pdf(md_content, font_name, emoji_font, pagesize, num_columns):
|
@@ -125,94 +95,61 @@ def markdown_to_story(markdown_text: str, font_name: str, emoji_font: str):
|
|
125 |
"""📜 Translates Markdown text into a sequence of ReportLab flowables for PDF rendering."""
|
126 |
styles = getSampleStyleSheet()
|
127 |
bold_font = f"{font_name}-Bold" if font_name != "Helvetica" else "Helvetica-Bold"
|
128 |
-
style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, fontSize=10)
|
129 |
-
style_h1 = ParagraphStyle('h1', fontName=bold_font, spaceBefore=12, fontSize=24)
|
|
|
|
|
|
|
|
|
130 |
story, first_heading = [], True
|
131 |
for line in markdown_text.split('\n'):
|
132 |
-
|
133 |
-
if
|
|
|
|
|
|
|
|
|
|
|
134 |
if not first_heading: story.append(PageBreak())
|
135 |
-
content, style, first_heading =
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
final_content = apply_emoji_font(formatted_content, emoji_font)
|
138 |
-
|
|
|
|
|
139 |
return story
|
140 |
|
141 |
|
142 |
-
# --- 🔮 Omni-Model
|
143 |
-
|
144 |
-
def process_text_input(prompt, model_deployment_name):
|
145 |
-
"""💬 Sends a text prompt to the Azure OpenAI model and gets a response."""
|
146 |
-
if not AZURE_CLIENT_AVAILABLE: return "Azure OpenAI client not configured. This is dummy text."
|
147 |
-
completion = client.chat.completions.create(
|
148 |
-
model=model_deployment_name,
|
149 |
-
messages=[{"role": "user", "content": prompt}]
|
150 |
-
)
|
151 |
-
return completion.choices[0].message.content
|
152 |
-
|
153 |
-
def process_image_input(image_file, prompt, model_deployment_name):
|
154 |
-
"""🖼️ Encodes an image and sends it with a prompt to the Azure OpenAI model."""
|
155 |
-
if not AZURE_CLIENT_AVAILABLE: return "Azure OpenAI client not configured. This is a dummy image description."
|
156 |
-
with Image.open(image_file.name) as img:
|
157 |
-
with io.BytesIO() as output:
|
158 |
-
img.save(output, format="PNG")
|
159 |
-
base64_image = base64.b64encode(output.getvalue()).decode("utf-8")
|
160 |
-
|
161 |
-
response = client.chat.completions.create(
|
162 |
-
model=model_deployment_name,
|
163 |
-
messages=[{"role": "user", "content": [
|
164 |
-
{"type": "text", "text": prompt},
|
165 |
-
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
|
166 |
-
]}]
|
167 |
-
)
|
168 |
-
return response.choices[0].message.content
|
169 |
-
|
170 |
-
def process_audio_input(audio_file, prompt, chat_model_deployment, transcribe_model_deployment):
|
171 |
-
"""🎤 Transcribes audio and sends the text with a prompt to the Azure OpenAI model."""
|
172 |
-
if not AZURE_CLIENT_AVAILABLE: return "Azure OpenAI client not configured. This is a dummy audio summary."
|
173 |
-
with open(audio_file.name, "rb") as f:
|
174 |
-
transcription = client.audio.transcriptions.create(
|
175 |
-
model=transcribe_model_deployment,
|
176 |
-
file=f
|
177 |
-
).text
|
178 |
-
|
179 |
-
full_prompt = f"{prompt}\n\nAudio Transcription:\n{transcription}"
|
180 |
-
return process_text_input(full_prompt, chat_model_deployment)
|
181 |
|
182 |
-
def
|
183 |
-
"""
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
# Process pages in pairs
|
190 |
-
for i in progress.tqdm(range(0, len(doc), 2), desc="Performing PDF OCR"):
|
191 |
-
page_images = []
|
192 |
-
messages = [{"type": "text", "text": prompt}]
|
193 |
-
|
194 |
-
# Get first page of the pair
|
195 |
-
page1 = doc.load_page(i)
|
196 |
-
pix1 = page1.get_pixmap(dpi=150)
|
197 |
-
img_bytes1 = pix1.tobytes("png")
|
198 |
-
base64_image1 = base64.b64encode(img_bytes1).decode("utf-8")
|
199 |
-
messages.append({"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image1}"}})
|
200 |
-
|
201 |
-
# Get second page if it exists
|
202 |
-
if i + 1 < len(doc):
|
203 |
-
page2 = doc.load_page(i + 1)
|
204 |
-
pix2 = page2.get_pixmap(dpi=150)
|
205 |
-
img_bytes2 = pix2.tobytes("png")
|
206 |
-
base64_image2 = base64.b64encode(img_bytes2).decode("utf-8")
|
207 |
-
messages.append({"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image2}"}})
|
208 |
-
|
209 |
-
response = client.chat.completions.create(
|
210 |
-
model=model_deployment_name,
|
211 |
-
messages=[{"role": "user", "content": messages}]
|
212 |
-
)
|
213 |
-
all_extracted_text.append(response.choices[0].message.content)
|
214 |
|
215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
|
217 |
|
218 |
# --- 🛠️ Helpers & Main API ---
|
@@ -231,8 +168,7 @@ def register_local_fonts():
|
|
231 |
emoji_font_name = font_name
|
232 |
else:
|
233 |
text_font_names.append(font_name)
|
234 |
-
except
|
235 |
-
print(f"Could not register font {font_path.name}: {e}")
|
236 |
if not text_font_names: text_font_names.append('Helvetica')
|
237 |
return sorted(text_font_names), emoji_font_name
|
238 |
|
@@ -247,53 +183,134 @@ def create_pdf_preview(pdf_path: Path):
|
|
247 |
"""🏞️ Generates a PNG thumbnail for the first page of a PDF."""
|
248 |
preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
|
249 |
try:
|
250 |
-
doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap()
|
251 |
pix.save(str(preview_path)); doc.close()
|
252 |
-
return
|
253 |
except: return None
|
254 |
|
255 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
"""🚀 The main entry point that orchestrates the entire multi-modal generation process."""
|
257 |
if not omni_prompt and not omni_files: raise gr.Error("Please provide a prompt or upload at least one file.")
|
258 |
if not output_formats: raise gr.Error("Please select at least one output format.")
|
259 |
|
260 |
-
chat_deployment = AZURE_DEPLOYMENT_NAMES.get(chat_model)
|
261 |
-
transcribe_deployment = AZURE_DEPLOYMENT_NAMES.get(transcribe_model)
|
262 |
-
if not chat_deployment: raise gr.Error(f"Deployment for model '{chat_model}' not found in configuration.")
|
263 |
-
|
264 |
shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
|
265 |
OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
|
266 |
|
267 |
-
# --- Step 1: Omni-Model Processing ---
|
268 |
md_content = ""
|
269 |
-
# Process files first
|
270 |
if omni_files:
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
elif '.png'
|
283 |
-
|
284 |
-
md_content = process_image_input(image_file, omni_prompt, chat_deployment)
|
285 |
-
elif '.wav' in extensions or '.mp3' in extensions or '.m4a' in extensions:
|
286 |
-
if not transcribe_deployment: raise gr.Error(f"Deployment for model '{transcribe_model}' not found.")
|
287 |
-
audio_file = next((f for f in omni_files if Path(f.name).suffix.lower() in ['.wav', '.mp3', '.m4a']), None)
|
288 |
-
md_content = process_audio_input(audio_file, omni_prompt, chat_deployment, transcribe_deployment)
|
289 |
-
# If no files, process text prompt
|
290 |
elif omni_prompt:
|
291 |
-
md_content = process_text_input(omni_prompt
|
292 |
|
293 |
-
if not md_content: raise gr.Error("Failed to generate source content
|
294 |
|
295 |
-
|
296 |
-
generated_files = []
|
297 |
for format_choice in progress.tqdm(output_formats, desc="Generating Formats"):
|
298 |
time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper()
|
299 |
if format_choice == "PDF":
|
@@ -302,50 +319,59 @@ def generate_outputs_api(omni_files, omni_prompt, chat_model, transcribe_model,
|
|
302 |
pagesize = LAYOUTS[layout_name]["size"]
|
303 |
final_pagesize = (pagesize[0] * page_w_mult, pagesize[1] * page_h_mult)
|
304 |
pdf_buffer = create_pdf(md_content, font_name, EMOJI_FONT_NAME, final_pagesize, num_columns)
|
305 |
-
filename = f"Document_{time_str}
|
306 |
output_path = OUTPUT_DIR / filename
|
307 |
with open(output_path, "wb") as f: f.write(pdf_buffer.getvalue())
|
308 |
-
generated_files.append(output_path)
|
309 |
elif format_choice == "DOCX":
|
310 |
-
|
311 |
-
|
312 |
-
output_path = OUTPUT_DIR / filename
|
313 |
-
docx_doc.save(output_path); generated_files.append(output_path)
|
314 |
elif format_choice == "XLSX":
|
315 |
-
|
316 |
-
|
317 |
-
output_path = OUTPUT_DIR / filename
|
318 |
-
xlsx_book.save(output_path); generated_files.append(output_path)
|
319 |
|
320 |
-
|
321 |
-
final_gallery = [g for g in gallery_previews if g]
|
322 |
|
323 |
-
return md_content,
|
324 |
|
325 |
# --- 🎨 Gradio UI Definition ---
|
326 |
AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
|
328 |
with gr.Blocks(theme=gr.themes.Soft(), title="Omni-Model Document Generator") as demo:
|
329 |
-
gr.Markdown("# 🧠 Omni-Model Document Generator
|
330 |
-
gr.Markdown("Provide a prompt, or upload a
|
331 |
|
332 |
with gr.Row():
|
333 |
with gr.Column(scale=1):
|
334 |
-
gr.
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
|
|
|
|
344 |
|
345 |
gr.Markdown("### 📄 Output Settings")
|
346 |
output_formats = gr.CheckboxGroup(choices=["PDF", "DOCX", "XLSX"], label="Select Output Formats", value=["PDF"])
|
347 |
|
348 |
-
with gr.Accordion("PDF Customization", open=
|
349 |
num_columns_slider = gr.Slider(label="Text Columns", minimum=1, maximum=4, step=1, value=1)
|
350 |
page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1)
|
351 |
page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1)
|
@@ -357,13 +383,21 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Omni-Model Document Generator") as
|
|
357 |
with gr.Column(scale=2):
|
358 |
gr.Markdown("### 🤖 AI Response (Source for Documents)")
|
359 |
ai_response_output = gr.Markdown(label="AI Generated Content")
|
360 |
-
gr.Markdown("###
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
|
368 |
if __name__ == "__main__":
|
369 |
-
demo.launch()
|
|
|
8 |
import base64
|
9 |
from collections import defaultdict
|
10 |
from PIL import Image
|
11 |
+
import json
|
12 |
|
13 |
# Document Generation Libs
|
14 |
from docx import Document
|
|
|
21 |
from reportlab.pdfbase import pdfmetrics
|
22 |
from reportlab.pdfbase.ttfonts import TTFont
|
23 |
|
24 |
+
# Media Libs
|
|
|
25 |
import fitz # PyMuPDF
|
26 |
|
27 |
# --- Configuration & Setup ---
|
28 |
CWD = Path.cwd()
|
29 |
OUTPUT_DIR = CWD / "generated_outputs"
|
30 |
PREVIEW_DIR = CWD / "previews"
|
31 |
+
UPLOAD_DIR = CWD / "uploads"
|
32 |
FONT_DIR = CWD
|
33 |
+
|
34 |
+
# Create necessary directories
|
35 |
OUTPUT_DIR.mkdir(exist_ok=True)
|
36 |
PREVIEW_DIR.mkdir(exist_ok=True)
|
37 |
+
UPLOAD_DIR.mkdir(exist_ok=True)
|
38 |
|
39 |
LAYOUTS = {
|
40 |
"A4 Portrait": {"size": A4},
|
|
|
43 |
"Letter Landscape": {"size": landscape(letter)},
|
44 |
}
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
# --- ✍️ Document Generation Engines ---
|
47 |
|
48 |
def create_pdf(md_content, font_name, emoji_font, pagesize, num_columns):
|
|
|
95 |
"""📜 Translates Markdown text into a sequence of ReportLab flowables for PDF rendering."""
|
96 |
styles = getSampleStyleSheet()
|
97 |
bold_font = f"{font_name}-Bold" if font_name != "Helvetica" else "Helvetica-Bold"
|
98 |
+
style_normal = ParagraphStyle('BodyText', fontName=font_name, spaceAfter=6, fontSize=10, leading=14)
|
99 |
+
style_h1 = ParagraphStyle('h1', fontName=bold_font, spaceBefore=12, fontSize=24, textColor=colors.HexColor("#1E3A8A"))
|
100 |
+
style_h2 = ParagraphStyle('h2', fontName=bold_font, spaceBefore=10, fontSize=18, textColor=colors.HexColor("#374151"))
|
101 |
+
style_h3 = ParagraphStyle('h3', fontName=bold_font, spaceBefore=8, fontSize=14, textColor=colors.HexColor("#4B5563"))
|
102 |
+
style_code = ParagraphStyle('Code', fontName='Courier', backColor=colors.whitesmoke, textColor=colors.darkred, borderWidth=1, borderColor=colors.lightgrey, padding=8)
|
103 |
+
|
104 |
story, first_heading = [], True
|
105 |
for line in markdown_text.split('\n'):
|
106 |
+
stripped_line = line.strip()
|
107 |
+
if not stripped_line:
|
108 |
+
story.append(Spacer(1, 0.1 * inch)); continue
|
109 |
+
|
110 |
+
# Determine the structural element and its style
|
111 |
+
content, style, extra_args = stripped_line, style_normal, {}
|
112 |
+
if stripped_line.startswith("# "):
|
113 |
if not first_heading: story.append(PageBreak())
|
114 |
+
content, style, first_heading = stripped_line.lstrip('# '), style_h1, False
|
115 |
+
elif stripped_line.startswith("## "):
|
116 |
+
content, style = stripped_line.lstrip('## '), style_h2
|
117 |
+
elif stripped_line.startswith("### "):
|
118 |
+
content, style = stripped_line.lstrip('### '), style_h3
|
119 |
+
elif stripped_line.startswith(("- ", "* ")):
|
120 |
+
content, extra_args = stripped_line[2:], {'bulletText': '•'}
|
121 |
+
|
122 |
+
# Now, format the content string correctly for ReportLab
|
123 |
+
# Apply bold/italic first
|
124 |
+
formatted_content = re.sub(r'_(.*?)_', r'<i>\1</i>', re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', content))
|
125 |
+
# Then, apply the emoji font tags. This order is crucial.
|
126 |
final_content = apply_emoji_font(formatted_content, emoji_font)
|
127 |
+
|
128 |
+
story.append(Paragraph(final_content, style, **extra_args))
|
129 |
+
|
130 |
return story
|
131 |
|
132 |
|
133 |
+
# --- 🔮 Virtual AI Omni-Model Functions ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
+
def process_text_input(prompt):
|
136 |
+
"""💬 Simulates an AI response to a text prompt."""
|
137 |
+
return f"# Virtual AI Response\n\n**Your Prompt:**\n> {prompt}\n\n**Generated Content:**\n- This is a simulated response for your text input.\n- Here's an emoji: 😊"
|
138 |
+
|
139 |
+
def process_image_input(image_path, prompt):
|
140 |
+
"""🖼️ Simulates an AI description of an image."""
|
141 |
+
return f"# Virtual AI Image Analysis: {Path(image_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Generated Content:**\n1. Simulated analysis of the uploaded image.\n2. File type appears to be `{Path(image_path).suffix}`."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
+
def process_audio_input(audio_path, prompt):
|
144 |
+
"""🎤 Simulates AI transcription and summarization of an audio file."""
|
145 |
+
return f"# Virtual AI Audio Summary: {Path(audio_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Simulated Transcription:**\n> \"This is a test of the emergency broadcast system.\"\n\n**Generated Summary:**\nThe audio is a test broadcast."
|
146 |
+
|
147 |
+
def process_pdf_input(pdf_path, prompt, progress):
|
148 |
+
"""📄 Simulates AI-powered OCR of a PDF document."""
|
149 |
+
progress(0.5, desc="Simulating PDF page processing...")
|
150 |
+
ocr_text = f"# Virtual AI OCR of: {Path(pdf_path).name}\n\n**Your Prompt:**\n> {prompt}\n\n**Extracted Content (Simulated):**\n- **Page 1:** Simulated text from the first page.\n- **Page 2:** Simulated text from the second page."
|
151 |
+
progress(1.0, desc="PDF OCR Simulation Complete!")
|
152 |
+
return ocr_text
|
153 |
|
154 |
|
155 |
# --- 🛠️ Helpers & Main API ---
|
|
|
168 |
emoji_font_name = font_name
|
169 |
else:
|
170 |
text_font_names.append(font_name)
|
171 |
+
except: pass
|
|
|
172 |
if not text_font_names: text_font_names.append('Helvetica')
|
173 |
return sorted(text_font_names), emoji_font_name
|
174 |
|
|
|
183 |
"""🏞️ Generates a PNG thumbnail for the first page of a PDF."""
|
184 |
preview_path = PREVIEW_DIR / f"{pdf_path.stem}.png"
|
185 |
try:
|
186 |
+
doc = fitz.open(pdf_path); page = doc.load_page(0); pix = page.get_pixmap(dpi=96)
|
187 |
pix.save(str(preview_path)); doc.close()
|
188 |
+
return preview_path
|
189 |
except: return None
|
190 |
|
191 |
+
def build_file_explorer_html(generated_files, pdf_files_for_gallery):
|
192 |
+
"""🗂️ Constructs the HTML/JS for the file explorer and PDF gallery."""
|
193 |
+
|
194 |
+
file_explorer_html = ""
|
195 |
+
file_icons = {".pdf": "📄", ".docx": "📝", ".xlsx": "📊"}
|
196 |
+
for file_path in generated_files:
|
197 |
+
icon = file_icons.get(file_path.suffix, '📎')
|
198 |
+
file_explorer_html += f"""
|
199 |
+
<a href="/file={file_path}" class="file-link" download="{file_path.name}">
|
200 |
+
<span class="file-icon">{icon}</span>
|
201 |
+
<span class="file-name">{file_path.name}</span>
|
202 |
+
</a>
|
203 |
+
"""
|
204 |
+
|
205 |
+
gallery_items = []
|
206 |
+
for pdf_path in pdf_files_for_gallery:
|
207 |
+
preview_path = create_pdf_preview(pdf_path)
|
208 |
+
if preview_path:
|
209 |
+
with open(preview_path, "rb") as f:
|
210 |
+
img_base64 = base64.b64encode(f.read()).decode("utf-8")
|
211 |
+
gallery_items.append({
|
212 |
+
"preview_src": f"data:image/png;base64,{img_base64}",
|
213 |
+
"filename": pdf_path.name
|
214 |
+
})
|
215 |
+
|
216 |
+
gallery_html = ""
|
217 |
+
if gallery_items:
|
218 |
+
thumbs_html = ""
|
219 |
+
for item in gallery_items:
|
220 |
+
thumbs_html += f'<img src="{item["preview_src"]}" class="thumbnail" onclick="selectThumbnail(this, \'{item["preview_src"]}\', \'{item["filename"]}\')">'
|
221 |
+
|
222 |
+
gallery_html = f"""
|
223 |
+
<div class="gallery-container">
|
224 |
+
<div class="main-view">
|
225 |
+
<img id="main-image" src="{gallery_items[0]['preview_src']}" class="main-image">
|
226 |
+
<p id="main-filename">{gallery_items[0]['filename']}</p>
|
227 |
+
</div>
|
228 |
+
<div class="thumbnail-strip">{thumbs_html}</div>
|
229 |
+
</div>
|
230 |
+
"""
|
231 |
+
|
232 |
+
html = f"""
|
233 |
+
<style>
|
234 |
+
.tabs {{ display: flex; border-bottom: 2px solid #ccc; }}
|
235 |
+
.tab-button {{ padding: 10px 15px; cursor: pointer; background: #f1f1f1; border: none; border-bottom: 2px solid transparent; outline: none; }}
|
236 |
+
.tab-button.active {{ background: #fff; border-top: 2px solid #007bff; border-left: 2px solid #ccc; border-right: 2px solid #ccc; border-bottom: 2px solid #fff; }}
|
237 |
+
.tab-content {{ display: none; padding: 15px; border: 1px solid #ccc; border-top: none; }}
|
238 |
+
.tab-content.active {{ display: block; }}
|
239 |
+
.file-explorer {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); gap: 10px; }}
|
240 |
+
.file-link {{ display: flex; align-items: center; padding: 10px; background: #f9f9f9; border-radius: 5px; text-decoration: none; color: #333; }}
|
241 |
+
.file-link:hover {{ background: #e9e9e9; }}
|
242 |
+
.file-icon {{ font-size: 2.5em; margin-right: 10px; }}
|
243 |
+
.gallery-container {{ display: flex; height: 500px; }}
|
244 |
+
.main-view {{ flex: 3; padding: 10px; display: flex; flex-direction: column; align-items: center; justify-content: center; }}
|
245 |
+
.main-image {{ max-width: 100%; max-height: 90%; }}
|
246 |
+
.thumbnail-strip {{ flex: 1; overflow-y: auto; padding: 5px; }}
|
247 |
+
.thumbnail {{ width: 100%; margin-bottom: 5px; cursor: pointer; border: 2px solid transparent; }}
|
248 |
+
.thumbnail.active {{ border-color: #007bff; }}
|
249 |
+
</style>
|
250 |
+
<div class="tabs">
|
251 |
+
<button class="tab-button active" onclick="openTab(event, 'explorer')">🗂️ File Explorer</button>
|
252 |
+
{'<button class="tab-button" onclick="openTab(event, \'gallery\')">🖼️ PDF Gallery</button>' if gallery_items else ''}
|
253 |
+
</div>
|
254 |
+
<div id="explorer" class="tab-content active">
|
255 |
+
<div class="file-explorer">{file_explorer_html}</div>
|
256 |
+
</div>
|
257 |
+
<div id="gallery" class="tab-content">
|
258 |
+
{gallery_html}
|
259 |
+
</div>
|
260 |
+
<script>
|
261 |
+
function openTab(evt, tabName) {{
|
262 |
+
var i, tabcontent, tablinks;
|
263 |
+
tabcontent = document.getElementsByClassName("tab-content");
|
264 |
+
for (i = 0; i < tabcontent.length; i++) {{ tabcontent[i].style.display = "none"; }}
|
265 |
+
tablinks = document.getElementsByClassName("tab-button");
|
266 |
+
for (i = 0; i < tablinks.length; i++) {{ tablinks[i].className = tablinks[i].className.replace(" active", ""); }}
|
267 |
+
document.getElementById(tabName).style.display = "block";
|
268 |
+
evt.currentTarget.className += " active";
|
269 |
+
}}
|
270 |
+
|
271 |
+
const mainImage = document.getElementById('main-image');
|
272 |
+
const mainFilename = document.getElementById('main-filename');
|
273 |
+
const thumbnails = document.querySelectorAll('.thumbnail');
|
274 |
+
if (thumbnails.length > 0) thumbnails[0].classList.add('active');
|
275 |
+
|
276 |
+
function selectThumbnail(selectedThumb, imgSrc, filename) {{
|
277 |
+
mainImage.src = imgSrc; mainFilename.textContent = filename;
|
278 |
+
thumbnails.forEach(t => t.classList.remove('active'));
|
279 |
+
selectedThumb.classList.add('active');
|
280 |
+
}};
|
281 |
+
</script>
|
282 |
+
"""
|
283 |
+
return html
|
284 |
+
|
285 |
+
def generate_outputs_api(omni_files, omni_prompt, output_formats, layouts, fonts, num_columns, page_w_mult, page_h_mult, progress=gr.Progress(track_tqdm=True)):
|
286 |
"""🚀 The main entry point that orchestrates the entire multi-modal generation process."""
|
287 |
if not omni_prompt and not omni_files: raise gr.Error("Please provide a prompt or upload at least one file.")
|
288 |
if not output_formats: raise gr.Error("Please select at least one output format.")
|
289 |
|
|
|
|
|
|
|
|
|
290 |
shutil.rmtree(OUTPUT_DIR, ignore_errors=True); shutil.rmtree(PREVIEW_DIR, ignore_errors=True)
|
291 |
OUTPUT_DIR.mkdir(); PREVIEW_DIR.mkdir()
|
292 |
|
|
|
293 |
md_content = ""
|
|
|
294 |
if omni_files:
|
295 |
+
temp_paths = []
|
296 |
+
for f in omni_files:
|
297 |
+
temp_path = UPLOAD_DIR / Path(f.name).name
|
298 |
+
shutil.copyfile(f.name, temp_path)
|
299 |
+
temp_paths.append(temp_path)
|
300 |
+
|
301 |
+
file_path = temp_paths[0]
|
302 |
+
file_ext = file_path.suffix.lower()
|
303 |
+
|
304 |
+
if file_ext == '.md': md_content = "\n\n".join([p.read_text(encoding='utf-8') for p in temp_paths if p.suffix.lower() == '.md'])
|
305 |
+
elif file_ext == '.pdf': md_content = process_pdf_input(file_path, omni_prompt or "Extract text", progress)
|
306 |
+
elif file_ext in ['.png', '.jpg', '.jpeg']: md_content = process_image_input(file_path, omni_prompt or "Describe image")
|
307 |
+
elif file_ext in ['.wav', '.mp3']: md_content = process_audio_input(file_path, omni_prompt or "Summarize transcription")
|
|
|
|
|
|
|
|
|
|
|
|
|
308 |
elif omni_prompt:
|
309 |
+
md_content = process_text_input(omni_prompt)
|
310 |
|
311 |
+
if not md_content: raise gr.Error("Failed to generate source content.")
|
312 |
|
313 |
+
generated_files, pdf_files_for_gallery = [], []
|
|
|
314 |
for format_choice in progress.tqdm(output_formats, desc="Generating Formats"):
|
315 |
time_str = datetime.datetime.now().strftime('%m-%d-%a_%I%M%p').upper()
|
316 |
if format_choice == "PDF":
|
|
|
319 |
pagesize = LAYOUTS[layout_name]["size"]
|
320 |
final_pagesize = (pagesize[0] * page_w_mult, pagesize[1] * page_h_mult)
|
321 |
pdf_buffer = create_pdf(md_content, font_name, EMOJI_FONT_NAME, final_pagesize, num_columns)
|
322 |
+
filename = f"Document_{time_str}.pdf"
|
323 |
output_path = OUTPUT_DIR / filename
|
324 |
with open(output_path, "wb") as f: f.write(pdf_buffer.getvalue())
|
325 |
+
generated_files.append(output_path); pdf_files_for_gallery.append(output_path)
|
326 |
elif format_choice == "DOCX":
|
327 |
+
doc = create_docx(md_content); filename = f"Document_{time_str}.docx"
|
328 |
+
output_path = OUTPUT_DIR / filename; doc.save(output_path); generated_files.append(output_path)
|
|
|
|
|
329 |
elif format_choice == "XLSX":
|
330 |
+
book = create_xlsx(md_content); filename = f"Outline_{time_str}.xlsx"
|
331 |
+
output_path = OUTPUT_DIR / filename; book.save(output_path); generated_files.append(output_path)
|
|
|
|
|
332 |
|
333 |
+
final_html_output = build_file_explorer_html(generated_files, pdf_files_for_gallery)
|
|
|
334 |
|
335 |
+
return md_content, final_html_output
|
336 |
|
337 |
# --- 🎨 Gradio UI Definition ---
|
338 |
AVAILABLE_FONTS, EMOJI_FONT_NAME = register_local_fonts()
|
339 |
+
SAMPLE_MARKDOWN = """# Deities Guide: Mythology and Moral Lessons
|
340 |
+
|
341 |
+
1. 📜 **Introduction**
|
342 |
+
- **Purpose**: Explore deities, spirits, saints, and beings with their epic stories and morals!
|
343 |
+
- **Usage**: A guide for learning and storytelling across traditions. ️
|
344 |
+
- **Themes**: Justice ⚖️, faith 🙏, hubris 🏛️, redemption ✨, cosmic order 🌌.
|
345 |
+
|
346 |
+
# ⚔️ Arthurian Legends
|
347 |
+
- **Merlin, Morgan le Fay, Arthur**: Mentor 🧙, rival 🧙♀️, son 👑.
|
348 |
+
- **Relation**: Family tests loyalty 🤝.
|
349 |
+
- **Lesson**: Honor 🎖️ vs. betrayal 🗡️.
|
350 |
+
"""
|
351 |
|
352 |
with gr.Blocks(theme=gr.themes.Soft(), title="Omni-Model Document Generator") as demo:
|
353 |
+
gr.Markdown("# 🧠 Omni-Model Document Generator")
|
354 |
+
gr.Markdown("Provide a prompt, or upload a file (MD, PDF, Image, Audio). A virtual AI will process it, and you can generate documents from the result.")
|
355 |
|
356 |
with gr.Row():
|
357 |
with gr.Column(scale=1):
|
358 |
+
with gr.Tabs():
|
359 |
+
with gr.TabItem("💬 Text"):
|
360 |
+
text_prompt = gr.Textbox(label="Prompt", lines=5, placeholder="Ask a question or provide instructions...")
|
361 |
+
with gr.TabItem("🖼️ Image"):
|
362 |
+
image_prompt = gr.Textbox(label="Image Prompt", lines=2, placeholder="e.g., Describe this picture")
|
363 |
+
image_file = gr.File(label="Upload Image", file_types=["image"])
|
364 |
+
with gr.TabItem("🎤 Audio"):
|
365 |
+
audio_prompt = gr.Textbox(label="Audio Prompt", lines=2, placeholder="e.g., Summarize this audio")
|
366 |
+
audio_file = gr.File(label="Upload Audio", file_types=[".wav", ".mp3"])
|
367 |
+
with gr.TabItem("📄 Document"):
|
368 |
+
doc_prompt = gr.Textbox(label="Document Prompt", lines=2, placeholder="e.g., Extract text from this PDF")
|
369 |
+
doc_file = gr.File(label="Upload MD or PDF", file_types=[".md", ".pdf"])
|
370 |
|
371 |
gr.Markdown("### 📄 Output Settings")
|
372 |
output_formats = gr.CheckboxGroup(choices=["PDF", "DOCX", "XLSX"], label="Select Output Formats", value=["PDF"])
|
373 |
|
374 |
+
with gr.Accordion("PDF Customization", open=False):
|
375 |
num_columns_slider = gr.Slider(label="Text Columns", minimum=1, maximum=4, step=1, value=1)
|
376 |
page_w_mult_slider = gr.Slider(label="Page Width Multiplier", minimum=1, maximum=5, step=1, value=1)
|
377 |
page_h_mult_slider = gr.Slider(label="Page Height Multiplier", minimum=1, maximum=2, step=1, value=1)
|
|
|
383 |
with gr.Column(scale=2):
|
384 |
gr.Markdown("### 🤖 AI Response (Source for Documents)")
|
385 |
ai_response_output = gr.Markdown(label="AI Generated Content")
|
386 |
+
gr.Markdown("### 🗂️ Generated Files")
|
387 |
+
file_explorer_output = gr.HTML(label="File Explorer & Gallery")
|
388 |
+
|
389 |
+
def master_process(p1, p2, p3, p4, f1, f2, f3, f4, *args):
|
390 |
+
# Determine active tab and route to the API
|
391 |
+
if f1: return generate_outputs_api([f1], p1 or "Describe this text", *args)
|
392 |
+
if f2: return generate_outputs_api([f2], p2 or "Describe this image", *args)
|
393 |
+
if f3: return generate_outputs_api([f3], p3 or "Summarize this audio", *args)
|
394 |
+
if f4: return generate_outputs_api([f4], p4 or "Process this document", *args)
|
395 |
+
if p1: return generate_outputs_api(None, p1, *args)
|
396 |
+
raise gr.Error("Please provide an input in one of the tabs.")
|
397 |
+
|
398 |
+
generate_btn.click(fn=master_process,
|
399 |
+
inputs=[text_prompt, image_prompt, audio_prompt, doc_prompt, text_prompt, image_file, audio_file, doc_file, output_formats, selected_layouts, selected_fonts, num_columns_slider, page_w_mult_slider, page_h_mult_slider],
|
400 |
+
outputs=[ai_response_output, file_explorer_output])
|
401 |
|
402 |
if __name__ == "__main__":
|
403 |
+
demo.launch(share=True)
|