ihansel commited on
Commit
6b52f14
·
verified ·
1 Parent(s): c129176

Add 2 files

Browse files
Files changed (2) hide show
  1. README.md +6 -4
  2. index.html +948 -19
README.md CHANGED
@@ -1,10 +1,12 @@
1
  ---
2
- title: Documentconversion
3
- emoji: 🐢
4
  colorFrom: purple
5
- colorTo: purple
6
  sdk: static
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: documentconversion
3
+ emoji: 🐳
4
  colorFrom: purple
5
+ colorTo: pink
6
  sdk: static
7
  pinned: false
8
+ tags:
9
+ - deepsite
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
index.html CHANGED
@@ -1,19 +1,948 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <meta name="theme-color" content="#000000">
7
+ <meta name="description" content="Convert documents to clean Markdown format">
8
+ <title>Basic Document Converter | PDF, EPUB, DOCX & PPTX to Markdown</title>
9
+ <link rel="manifest" href="/manifest.webmanifest">
10
+ <script src="https://cdn.tailwindcss.com"></script>
11
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.12.313/pdf.min.js"></script>
12
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/showdown/2.1.0/showdown.min.js"></script>
13
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/jszip.min.js"></script>
14
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/mammoth/1.4.0/mammoth.browser.min.js"></script>
15
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/pptx2md.min.js"></script>
16
+ <script src="https://kit.fontawesome.com/a076d05399.js" crossorigin="anonymous"></script>
17
+ <style>
18
+ @import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;700&display=swap');
19
+
20
+ body {
21
+ font-family: 'Space Grotesk', sans-serif;
22
+ background-color: #F5F5F5;
23
+ }
24
+
25
+ .neo-border {
26
+ border: 3px solid #000;
27
+ box-shadow: 8px 8px 0 #000;
28
+ }
29
+
30
+ .neo-border-thick {
31
+ border: 4px solid #000;
32
+ }
33
+
34
+ .neo-border-thin {
35
+ border: 2px solid #000;
36
+ }
37
+
38
+ .neo-tab-active {
39
+ border-bottom: 4px solid #000;
40
+ font-weight: 700;
41
+ }
42
+
43
+ .neo-progress {
44
+ background-color: #E0E0E0;
45
+ border: 2px solid #000;
46
+ }
47
+
48
+ .neo-progress-bar {
49
+ background-color: #000;
50
+ }
51
+
52
+ .dropzone {
53
+ border: 3px dashed #000;
54
+ transition: all 0.2s;
55
+ }
56
+
57
+ .dropzone.active {
58
+ background-color: #FFF0F0;
59
+ }
60
+
61
+ .document-preview-container {
62
+ display: grid;
63
+ grid-template-columns: 1fr;
64
+ gap: 1rem;
65
+ height: 500px;
66
+ }
67
+
68
+ .document-preview {
69
+ border: 3px solid #000;
70
+ background-color: white;
71
+ overflow-y: auto;
72
+ height: 100%;
73
+ }
74
+
75
+ .page-canvas {
76
+ border: 2px solid #000;
77
+ margin-bottom: 1rem;
78
+ max-width: 100%;
79
+ }
80
+
81
+ .neo-btn {
82
+ border: 3px solid #000;
83
+ font-weight: 700;
84
+ letter-spacing: -0.5px;
85
+ transition: all 0.2s;
86
+ }
87
+
88
+ .neo-btn:hover {
89
+ transform: translate(-2px, -2px);
90
+ box-shadow: 4px 4px 0 #000;
91
+ }
92
+
93
+ .neo-btn:active {
94
+ transform: translate(0, 0);
95
+ box-shadow: none;
96
+ }
97
+
98
+ .neo-btn-primary {
99
+ background-color: #000;
100
+ color: white;
101
+ }
102
+
103
+ .neo-btn-secondary {
104
+ background-color: white;
105
+ color: black;
106
+ }
107
+
108
+ .neo-checkbox {
109
+ -webkit-appearance: none;
110
+ -moz-appearance: none;
111
+ appearance: none;
112
+ width: 20px;
113
+ height: 20px;
114
+ border: 3px solid #000;
115
+ margin-right: 8px;
116
+ position: relative;
117
+ top: 4px;
118
+ }
119
+
120
+ .neo-checkbox:checked {
121
+ background-color: #000;
122
+ background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='white' viewBox='0 0 16 16'%3E%3Cpath d='M12.736 3.97a.733.733 0 0 1 1.047 0c.286.289.29.756.01 1.05L7.88 12.01a.733.733 0 0 1-1.065.02L3.217 8.384a.757.757 0 0 1 0-1.06.733.733 0 0 1 1.047 0l3.052 3.093 5.4-6.425a.247.247 0 0 1 .02-.022Z'/%3E%3C/svg%3E");
123
+ background-repeat: no-repeat;
124
+ background-position: center;
125
+ }
126
+
127
+ #markdownOutput {
128
+ font-family: 'Space Mono', monospace;
129
+ white-space: pre-wrap;
130
+ background-color: white;
131
+ border: 3px solid #000;
132
+ padding: 1rem;
133
+ height: 100%;
134
+ overflow-y: auto;
135
+ }
136
+
137
+ #installBtn {
138
+ position: fixed;
139
+ bottom: 1rem;
140
+ right: 1rem;
141
+ z-index: 100;
142
+ }
143
+
144
+ .preview-slide {
145
+ width: 100%;
146
+ background-color: white;
147
+ padding: 1rem;
148
+ border: 2px solid #000;
149
+ margin-bottom: 1rem;
150
+ }
151
+
152
+ .file-info {
153
+ display: flex;
154
+ align-items: center;
155
+ gap: 0.5rem;
156
+ margin-bottom: 1rem;
157
+ }
158
+
159
+ .file-icon {
160
+ font-size: 1.5rem;
161
+ }
162
+
163
+ @media (max-width: 768px) {
164
+ .container {
165
+ padding: 1rem;
166
+ }
167
+
168
+ .neo-border {
169
+ box-shadow: 4px 4px 0 #000;
170
+ }
171
+
172
+ .document-preview-container {
173
+ grid-template-columns: 1fr;
174
+ height: auto;
175
+ }
176
+
177
+ .document-preview {
178
+ height: 300px;
179
+ }
180
+ }
181
+ </style>
182
+ </head>
183
+ <body class="min-h-screen">
184
+ <div class="container mx-auto px-4 py-12 max-w-6xl">
185
+ <div class="text-center mb-12">
186
+ <h1 class="text-4xl font-bold mb-4 tracking-tight">BASIC DOCUMENT CONVERTER</h1>
187
+ <p class="text-xl">TRANSFORM PDF, EPUB, DOCX & PPTX TO CLEAN MARKDOWN</p>
188
+ </div>
189
+
190
+ <div class="neo-border bg-white mb-8">
191
+ <div class="p-8">
192
+ <div class="grid grid-cols-1 lg:grid-cols-2 gap-8">
193
+ <!-- Upload Section -->
194
+ <div>
195
+ <div id="dropzone" class="dropzone rounded-none p-12 text-center cursor-pointer mb-8">
196
+ <div id="uploadContent" class="flex flex-col items-center justify-center">
197
+ <i class="fas fa-file-upload text-5xl mb-4"></i>
198
+ <h3 class="text-xl font-bold mb-2">DRAG & DROP ANY DOCUMENT HERE</h3>
199
+ <p class="mb-6">SUPPORTS PDF, EPUB, DOCX & PPTX</p>
200
+ <input type="file" id="fileInput" accept=".pdf,.epub,.docx,.pptx" class="hidden">
201
+ <button id="browseBtn" class="neo-btn neo-btn-primary px-6 py-3">
202
+ SELECT DOCUMENT
203
+ </button>
204
+ </div>
205
+ </div>
206
+
207
+ <!-- File Info -->
208
+ <div id="fileInfoContainer" class="neo-border-thin p-4 mb-4 bg-white hidden">
209
+ <div class="file-info">
210
+ <i id="fileIcon" class="file-icon"></i>
211
+ <div>
212
+ <h3 id="fileName" class="font-bold"></h3>
213
+ <p id="fileType" class="text-sm"></p>
214
+ </div>
215
+ </div>
216
+ </div>
217
+
218
+ <!-- Options -->
219
+ <div class="neo-border-thin p-6 mb-8 bg-white">
220
+ <h3 class="font-bold text-lg mb-4">CONVERSION OPTIONS</h3>
221
+ <div class="space-y-4">
222
+ <div class="flex items-start">
223
+ <input type="checkbox" id="preserveLayout" class="neo-checkbox" checked>
224
+ <label for="preserveLayout" class="text-base">PRESERVE LAYOUT STRUCTURE</label>
225
+ </div>
226
+ <div class="flex items-start">
227
+ <input type="checkbox" id="detectHeadings" class="neo-checkbox" checked>
228
+ <label for="detectHeadings" class="text-base">AUTO-DETECT HEADINGS</label>
229
+ </div>
230
+ <div class="flex items-start">
231
+ <input type="checkbox" id="includeMetadata" class="neo-checkbox" checked>
232
+ <label for="includeMetadata" class="text-base">INCLUDE DOCUMENT METADATA</label>
233
+ </div>
234
+ </div>
235
+ </div>
236
+
237
+ <!-- Progress -->
238
+ <div id="progressContainer" class="hidden">
239
+ <div class="flex justify-between mb-2">
240
+ <span class="font-bold">CONVERSION PROGRESS</span>
241
+ <span id="progressPercent" class="font-bold">0%</span>
242
+ </div>
243
+ <div class="neo-progress w-full h-3 mb-2">
244
+ <div id="progressBar" class="neo-progress-bar h-full" style="width: 0%"></div>
245
+ </div>
246
+ <p id="progressText" class="font-medium">PROCESSING DOCUMENT...</p>
247
+ </div>
248
+ </div>
249
+
250
+ <!-- Preview Section -->
251
+ <div>
252
+ <div class="flex justify-between items-center mb-4">
253
+ <h3 class="font-bold text-lg">MARKDOWN OUTPUT</h3>
254
+ <div class="flex space-x-3">
255
+ <button id="copyBtn" class="neo-btn neo-btn-secondary px-4 py-2 text-sm hidden">
256
+ <i class="fas fa-copy mr-1"></i> COPY
257
+ </button>
258
+ <button id="downloadBtn" class="neo-btn neo-btn-primary px-4 py-2 text-sm hidden">
259
+ <i class="fas fa-download mr-1"></i> DOWNLOAD
260
+ </button>
261
+ </div>
262
+ </div>
263
+
264
+ <div class="document-preview-container">
265
+ <div id="documentPreview" class="document-preview p-4">
266
+ <div id="previewContent" class="flex flex-col items-center"></div>
267
+ </div>
268
+ <div id="markdownOutput" class="document-preview p-4"></div>
269
+ </div>
270
+ </div>
271
+ </div>
272
+ </div>
273
+ </div>
274
+
275
+ <div class="text-center font-medium">
276
+ <p>THIS TOOL WORKS ENTIRELY IN YOUR BROWSER. YOUR FILES ARE NEVER UPLOADED TO ANY SERVER.</p>
277
+ </div>
278
+ </div>
279
+
280
+ <!-- Install button (hidden by default) -->
281
+ <button id="installBtn" class="neo-btn neo-btn-primary px-6 py-3 hidden">
282
+ <i class="fas fa-download mr-2"></i> INSTALL APP
283
+ </button>
284
+
285
+ <!-- Service Worker Registration -->
286
+ <script>
287
+ // Initialize PDF.js worker
288
+ pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.12.313/pdf.worker.min.js';
289
+
290
+ // PWA Installation
291
+ let deferredPrompt;
292
+ const installBtn = document.getElementById('installBtn');
293
+
294
+ window.addEventListener('beforeinstallprompt', (e) => {
295
+ e.preventDefault();
296
+ deferredPrompt = e;
297
+ installBtn.classList.remove('hidden');
298
+ });
299
+
300
+ installBtn.addEventListener('click', async () => {
301
+ if (!deferredPrompt) return;
302
+ deferredPrompt.prompt();
303
+ const { outcome } = await deferredPrompt.userChoice;
304
+ installBtn.classList.add('hidden');
305
+ deferredPrompt = null;
306
+ });
307
+
308
+ window.addEventListener('appinstalled', () => {
309
+ installBtn.classList.add('hidden');
310
+ deferredPrompt = null;
311
+ });
312
+
313
+ // Check if the app is running as a PWA
314
+ if (window.matchMedia('(display-mode: standalone)').matches || window.navigator.standalone) {
315
+ console.log('Running as PWA');
316
+ }
317
+
318
+ // Register Service Worker
319
+ if ('serviceWorker' in navigator) {
320
+ window.addEventListener('load', () => {
321
+ navigator.serviceWorker.register('/sw.js').then(registration => {
322
+ console.log('ServiceWorker registration successful');
323
+ }).catch(err => {
324
+ console.log('ServiceWorker registration failed: ', err);
325
+ });
326
+ });
327
+ }
328
+
329
+ // Generate manifest dynamically
330
+ const manifest = {
331
+ "name": "Universal Document Converter",
332
+ "short_name": "DocConvert",
333
+ "description": "Convert documents to clean Markdown format",
334
+ "start_url": "/",
335
+ "display": "standalone",
336
+ "background_color": "#F5F5F5",
337
+ "theme_color": "#000000",
338
+ "icons": [
339
+ {
340
+ "src": "icon-192x192.png",
341
+ "sizes": "192x192",
342
+ "type": "image/png"
343
+ },
344
+ {
345
+ "src": "icon-512x512.png",
346
+ "sizes": "512x512",
347
+ "type": "image/png"
348
+ },
349
+ {
350
+ "src": "icon-maskable-192x192.png",
351
+ "sizes": "192x192",
352
+ "type": "image/png",
353
+ "purpose": "maskable"
354
+ },
355
+ {
356
+ "src": "icon-maskable-512x512.png",
357
+ "sizes": "512x512",
358
+ "type": "image/png",
359
+ "purpose": "maskable"
360
+ }
361
+ ]
362
+ };
363
+
364
+ // Create a blob URL for the manifest
365
+ const manifestBlob = new Blob([JSON.stringify(manifest)], { type: 'application/json' });
366
+ const manifestUrl = URL.createObjectURL(manifestBlob);
367
+
368
+ // Create a link element for the manifest
369
+ const manifestLink = document.createElement('link');
370
+ manifestLink.rel = 'manifest';
371
+ manifestLink.href = manifestUrl;
372
+ document.head.appendChild(manifestLink);
373
+
374
+ // Main application code
375
+ document.addEventListener('DOMContentLoaded', function() {
376
+ // DOM elements
377
+ const fileInput = document.getElementById('fileInput');
378
+ const browseBtn = document.getElementById('browseBtn');
379
+ const dropzone = document.getElementById('dropzone');
380
+ const markdownOutput = document.getElementById('markdownOutput');
381
+ const copyBtn = document.getElementById('copyBtn');
382
+ const downloadBtn = document.getElementById('downloadBtn');
383
+ const progressContainer = document.getElementById('progressContainer');
384
+ const progressBar = document.getElementById('progressBar');
385
+ const progressPercent = document.getElementById('progressPercent');
386
+ const progressText = document.getElementById('progressText');
387
+ const documentPreview = document.getElementById('documentPreview');
388
+ const previewContent = document.getElementById('previewContent');
389
+ const fileInfoContainer = document.getElementById('fileInfoContainer');
390
+ const fileName = document.getElementById('fileName');
391
+ const fileType = document.getElementById('fileType');
392
+ const fileIcon = document.getElementById('fileIcon');
393
+
394
+ // State variables
395
+ let currentMarkdown = '';
396
+ let currentFilename = '';
397
+ let currentFileType = '';
398
+ let pdfPages = [];
399
+ let epubItems = [];
400
+ let docxPages = [];
401
+ let pptxSlides = [];
402
+
403
+ // Set up drag and drop
404
+ ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
405
+ dropzone.addEventListener(eventName, preventDefaults, false);
406
+ });
407
+
408
+ function preventDefaults(e) {
409
+ e.preventDefault();
410
+ e.stopPropagation();
411
+ }
412
+
413
+ ['dragenter', 'dragover'].forEach(eventName => {
414
+ dropzone.addEventListener(eventName, highlight, false);
415
+ });
416
+
417
+ ['dragleave', 'drop'].forEach(eventName => {
418
+ dropzone.addEventListener(eventName, unhighlight, false);
419
+ });
420
+
421
+ function highlight() {
422
+ dropzone.classList.add('active');
423
+ }
424
+
425
+ function unhighlight() {
426
+ dropzone.classList.remove('active');
427
+ }
428
+
429
+ dropzone.addEventListener('drop', handleDrop, false);
430
+
431
+ // Fix for the browse button
432
+ browseBtn.addEventListener('click', function(e) {
433
+ e.preventDefault();
434
+ fileInput.click();
435
+ });
436
+
437
+ fileInput.addEventListener('change', function(e) {
438
+ handleFiles(e);
439
+ });
440
+
441
+ function handleDrop(e) {
442
+ const dt = e.dataTransfer;
443
+ const file = dt.files[0];
444
+ if (file) {
445
+ const event = { target: { files: [file] } };
446
+ handleFiles(event);
447
+ }
448
+ }
449
+
450
+ // Copy to clipboard
451
+ copyBtn.addEventListener('click', () => {
452
+ navigator.clipboard.writeText(currentMarkdown).then(() => {
453
+ const originalText = copyBtn.innerHTML;
454
+ copyBtn.innerHTML = '<i class="fas fa-check mr-1"></i> COPIED!';
455
+ setTimeout(() => {
456
+ copyBtn.innerHTML = originalText;
457
+ }, 2000);
458
+ });
459
+ });
460
+
461
+ // Download markdown file
462
+ downloadBtn.addEventListener('click', () => {
463
+ const blob = new Blob([currentMarkdown], { type: 'text/markdown' });
464
+ const url = URL.createObjectURL(blob);
465
+ const a = document.createElement('a');
466
+ a.href = url;
467
+ a.download = `${currentFilename}.md`;
468
+ document.body.appendChild(a);
469
+ a.click();
470
+ document.body.removeChild(a);
471
+ URL.revokeObjectURL(url);
472
+ });
473
+
474
+ function handleFiles(e) {
475
+ const file = e.target.files[0];
476
+ if (!file) return;
477
+
478
+ // Reset state
479
+ currentFilename = file.name.replace(/\.[^/.]+$/, "") || 'converted';
480
+ currentMarkdown = '';
481
+ markdownOutput.textContent = '';
482
+ previewContent.innerHTML = '';
483
+ pdfPages = [];
484
+ epubItems = [];
485
+ docxPages = [];
486
+ pptxSlides = [];
487
+
488
+ // Determine file type
489
+ if (file.name.endsWith('.pdf')) {
490
+ currentFileType = 'pdf';
491
+ fileIcon.className = 'file-icon fas fa-file-pdf';
492
+ fileType.textContent = 'PDF Document';
493
+ } else if (file.name.endsWith('.epub')) {
494
+ currentFileType = 'epub';
495
+ fileIcon.className = 'file-icon fas fa-book-open';
496
+ fileType.textContent = 'EPUB eBook';
497
+ } else if (file.name.endsWith('.docx')) {
498
+ currentFileType = 'docx';
499
+ fileIcon.className = 'file-icon fas fa-file-word';
500
+ fileType.textContent = 'Word Document';
501
+ } else if (file.name.endsWith('.pptx')) {
502
+ currentFileType = 'pptx';
503
+ fileIcon.className = 'file-icon fas fa-file-powerpoint';
504
+ fileType.textContent = 'PowerPoint Presentation';
505
+ } else {
506
+ alert('Unsupported file type. Please upload a PDF, EPUB, DOCX or PPTX file.');
507
+ return;
508
+ }
509
+
510
+ // Show file info
511
+ fileName.textContent = file.name;
512
+ fileInfoContainer.classList.remove('hidden');
513
+
514
+ // Process the file
515
+ processFile(file);
516
+ }
517
+
518
+ async function processFile(file) {
519
+ try {
520
+ // Show progress
521
+ progressContainer.classList.remove('hidden');
522
+ progressBar.style.width = '0%';
523
+ progressPercent.textContent = '0%';
524
+ progressText.textContent = 'PROCESSING DOCUMENT...';
525
+
526
+ // Clear previous content
527
+ markdownOutput.textContent = 'LOADING...';
528
+ previewContent.innerHTML = '';
529
+
530
+ // Dispatch to appropriate converter
531
+ switch(currentFileType) {
532
+ case 'pdf':
533
+ await convertPdfToMarkdown(file);
534
+ break;
535
+ case 'epub':
536
+ await convertEpubToMarkdown(file);
537
+ break;
538
+ case 'docx':
539
+ await convertDocxToMarkdown(file);
540
+ break;
541
+ case 'pptx':
542
+ await convertPptxToMarkdown(file);
543
+ break;
544
+ }
545
+
546
+ // Show action buttons
547
+ copyBtn.classList.remove('hidden');
548
+ downloadBtn.classList.remove('hidden');
549
+
550
+ // Update progress
551
+ progressBar.style.width = '100%';
552
+ progressPercent.textContent = '100%';
553
+ progressText.textContent = 'CONVERSION COMPLETE!';
554
+
555
+ } catch (error) {
556
+ console.error(`Error converting ${currentFileType}:`, error);
557
+ markdownOutput.textContent = `ERROR: ${error.message}`;
558
+ progressText.textContent = 'CONVERSION FAILED';
559
+ }
560
+ }
561
+
562
+ async function convertPdfToMarkdown(file) {
563
+ const arrayBuffer = await file.arrayBuffer();
564
+ const loadingTask = pdfjsLib.getDocument(arrayBuffer);
565
+ const pdf = await loadingTask.promise;
566
+
567
+ let markdownContent = '';
568
+ const totalPages = pdf.numPages;
569
+
570
+ // Process each page
571
+ for (let i = 1; i <= totalPages; i++) {
572
+ const page = await pdf.getPage(i);
573
+
574
+ // Update progress
575
+ const progress = Math.round((i / totalPages) * 100);
576
+ progressBar.style.width = `${progress}%`;
577
+ progressPercent.textContent = `${progress}%`;
578
+ progressText.textContent = `PROCESSING PAGE ${i} OF ${totalPages}...`;
579
+
580
+ // Get text content
581
+ const textContent = await page.getTextContent();
582
+ const pageText = textContent.items.map(item => item.str).join(' ');
583
+
584
+ // Add page separator if not first page
585
+ if (i > 1) {
586
+ markdownContent += `\n\n---\n\n`;
587
+ }
588
+
589
+ // Add page number
590
+ markdownContent += `# PAGE ${i}\n\n`;
591
+
592
+ // Add the text content
593
+ markdownContent += pageText;
594
+
595
+ // Render PDF preview
596
+ const viewport = page.getViewport({ scale: 0.8 });
597
+ const canvas = document.createElement('canvas');
598
+ const context = canvas.getContext('2d');
599
+ canvas.height = viewport.height;
600
+ canvas.width = viewport.width;
601
+ canvas.className = 'page-canvas';
602
+
603
+ await page.render({
604
+ canvasContext: context,
605
+ viewport: viewport
606
+ }).promise;
607
+
608
+ pdfPages.push(canvas);
609
+ }
610
+
611
+ // Display preview
612
+ pdfPages.forEach(page => {
613
+ previewContent.appendChild(page);
614
+ });
615
+
616
+ // Post-process the markdown
617
+ currentMarkdown = postProcessMarkdown(markdownContent);
618
+ markdownOutput.textContent = currentMarkdown;
619
+ }
620
+
621
+ async function convertEpubToMarkdown(file) {
622
+ const arrayBuffer = await file.arrayBuffer();
623
+ const zip = await JSZip.loadAsync(arrayBuffer);
624
+
625
+ let markdownContent = '';
626
+
627
+ // Get container.xml to find the rootfile
628
+ const containerData = await zip.file('META-INF/container.xml').async('text');
629
+ const rootFilePath = containerData.match(/<rootfile[^>]*full-path="([^"]*)"/)[1];
630
+
631
+ // Parse the rootfile (usually content.opf)
632
+ const rootFileData = await zip.file(rootFilePath).async('text');
633
+ const parser = new DOMParser();
634
+ const opfDoc = parser.parseFromString(rootFileData, 'application/xml');
635
+
636
+ // Extract metadata if enabled
637
+ if (document.getElementById('includeMetadata').checked) {
638
+ const metadata = opfDoc.querySelector('metadata');
639
+ if (metadata) {
640
+ markdownContent += '# EPUB METADATA\n\n';
641
+
642
+ const title = metadata.querySelector('title')?.textContent;
643
+ if (title) markdownContent += `**TITLE:** ${title}\n\n`;
644
+
645
+ const creator = metadata.querySelector('creator')?.textContent;
646
+ if (creator) markdownContent += `**AUTHOR:** ${creator}\n\n`;
647
+
648
+ const date = metadata.querySelector('date')?.textContent;
649
+ if (date) markdownContent += `**DATE:** ${date}\n\n`;
650
+
651
+ const publisher = metadata.querySelector('publisher')?.textContent;
652
+ if (publisher) markdownContent += `**PUBLISHER:** ${publisher}\n\n`;
653
+
654
+ markdownContent += '---\n\n';
655
+ }
656
+ }
657
+
658
+ // Get the manifest (list of all files)
659
+ const manifest = {};
660
+ opfDoc.querySelectorAll('manifest item').forEach(item => {
661
+ manifest[item.getAttribute('id')] = item.getAttribute('href');
662
+ });
663
+
664
+ // Get the spine (reading order)
665
+ const spineItems = opfDoc.querySelectorAll('spine itemref');
666
+ const totalItems = spineItems.length;
667
+ let processedItems = 0;
668
+
669
+ // Process each spine item
670
+ for (const item of spineItems) {
671
+ const idref = item.getAttribute('idref');
672
+ const href = manifest[idref];
673
+ if (!href) continue;
674
+
675
+ // Get the full path to the content file
676
+ const contentPath = rootFilePath.split('/').slice(0, -1).join('/');
677
+ const fullPath = contentPath ? `${contentPath}/${href}` : href;
678
+
679
+ // Update progress
680
+ processedItems++;
681
+ const progress = Math.round((processedItems / totalItems) * 100);
682
+ progressBar.style.width = `${progress}%`;
683
+ progressPercent.textContent = `${progress}%`;
684
+ progressText.textContent = `PROCESSING ITEM ${processedItems} OF ${totalItems}...`;
685
+
686
+ // Read the content file
687
+ const contentFile = zip.file(fullPath);
688
+ if (!contentFile) continue;
689
+
690
+ const content = await contentFile.async('text');
691
+
692
+ // Parse HTML content
693
+ const contentDoc = parser.parseFromString(content, 'text/html');
694
+
695
+ // Remove scripts and styles
696
+ contentDoc.querySelectorAll('script, style').forEach(el => el.remove());
697
+
698
+ // Convert to text with basic formatting
699
+ let itemContent = '';
700
+
701
+ // Process headings
702
+ contentDoc.querySelectorAll('h1, h2, h3, h4, h5, h6').forEach(heading => {
703
+ const level = parseInt(heading.tagName.substring(1));
704
+ itemContent += `${'#'.repeat(level)} ${heading.textContent}\n\n`;
705
+ });
706
+
707
+ // Process paragraphs
708
+ contentDoc.querySelectorAll('p').forEach(p => {
709
+ itemContent += `${p.textContent}\n\n`;
710
+ });
711
+
712
+ // Process lists
713
+ contentDoc.querySelectorAll('ul, ol').forEach(list => {
714
+ const isOrdered = list.tagName === 'OL';
715
+ list.querySelectorAll('li').forEach((li, index) => {
716
+ const prefix = isOrdered ? `${index + 1}.` : '-';
717
+ itemContent += `${prefix} ${li.textContent}\n`;
718
+ });
719
+ itemContent += '\n';
720
+ });
721
+
722
+ // Add to markdown content
723
+ markdownContent += itemContent;
724
+
725
+ // Add separator if not last item
726
+ if (processedItems < totalItems) {
727
+ markdownContent += '\n---\n\n';
728
+ }
729
+
730
+ // Create preview element
731
+ const previewDiv = document.createElement('div');
732
+ previewDiv.className = 'preview-slide';
733
+
734
+ const previewTitle = document.createElement('h4');
735
+ previewTitle.className = 'font-bold mb-2';
736
+ previewTitle.textContent = href;
737
+
738
+ const previewContentDiv = document.createElement('div');
739
+ previewContentDiv.className = 'text-sm';
740
+ previewContentDiv.textContent = itemContent.substring(0, 500) + (itemContent.length > 500 ? '...' : '');
741
+
742
+ previewDiv.appendChild(previewTitle);
743
+ previewDiv.appendChild(previewContentDiv);
744
+ epubItems.push(previewDiv);
745
+ }
746
+
747
+ // Display preview
748
+ epubItems.forEach(item => {
749
+ previewContent.appendChild(item);
750
+ });
751
+
752
+ // Post-process the markdown
753
+ currentMarkdown = postProcessMarkdown(markdownContent);
754
+ markdownOutput.textContent = currentMarkdown;
755
+ }
756
+
757
+ async function convertDocxToMarkdown(file) {
758
+ const arrayBuffer = await file.arrayBuffer();
759
+
760
+ // Convert DOCX to Markdown using mammoth.js
761
+ const result = await mammoth.extractRawText({ arrayBuffer: arrayBuffer });
762
+
763
+ // Get the raw text
764
+ let markdownContent = result.value;
765
+
766
+ // Split into pages (simulated - DOCX doesn't have pages)
767
+ const pageSize = 2000; // Approximate characters per page
768
+ const pageCount = Math.ceil(markdownContent.length / pageSize);
769
+
770
+ // Create preview elements
771
+ for (let i = 0; i < pageCount; i++) {
772
+ const start = i * pageSize;
773
+ const end = start + pageSize;
774
+ const pageText = markdownContent.substring(start, end);
775
+
776
+ const previewDiv = document.createElement('div');
777
+ previewDiv.className = 'preview-slide';
778
+
779
+ const previewTitle = document.createElement('h4');
780
+ previewTitle.className = 'font-bold mb-2';
781
+ previewTitle.textContent = `Page ${i + 1}`;
782
+
783
+ const previewContentDiv = document.createElement('div');
784
+ previewContentDiv.className = 'text-sm';
785
+ previewContentDiv.textContent = pageText.substring(0, 500) + (pageText.length > 500 ? '...' : '');
786
+
787
+ previewDiv.appendChild(previewTitle);
788
+ previewDiv.appendChild(previewContentDiv);
789
+ docxPages.push(previewDiv);
790
+
791
+ // Update progress
792
+ const progress = Math.round((i / pageCount) * 100);
793
+ progressBar.style.width = `${progress}%`;
794
+ progressPercent.textContent = `${progress}%`;
795
+ progressText.textContent = `PROCESSING DOCUMENT...`;
796
+ }
797
+
798
+ // Display preview
799
+ docxPages.forEach(page => {
800
+ previewContent.appendChild(page);
801
+ });
802
+
803
+ // Post-process the markdown
804
+ currentMarkdown = postProcessMarkdown(markdownContent);
805
+ markdownOutput.textContent = currentMarkdown;
806
+ }
807
+
808
+ async function convertPptxToMarkdown(file) {
809
+ const arrayBuffer = await file.arrayBuffer();
810
+
811
+ // Convert PPTX to Markdown using pptx2md
812
+ const result = await pptx2md(arrayBuffer);
813
+
814
+ // Get the markdown content
815
+ let markdownContent = result.markdown;
816
+
817
+ // Create preview elements for each slide
818
+ result.slides.forEach((slide, index) => {
819
+ const previewDiv = document.createElement('div');
820
+ previewDiv.className = 'preview-slide';
821
+
822
+ const previewTitle = document.createElement('h4');
823
+ previewTitle.className = 'font-bold mb-2';
824
+ previewTitle.textContent = `Slide ${index + 1}`;
825
+
826
+ const previewContentDiv = document.createElement('div');
827
+ previewContentDiv.className = 'text-sm';
828
+
829
+ // Create a simplified preview of the slide content
830
+ let previewText = '';
831
+ if (slide.title) previewText += `# ${slide.title}\n\n`;
832
+ if (slide.notes) previewText += `${slide.notes}\n\n`;
833
+ if (slide.bodies && slide.bodies.length > 0) {
834
+ previewText += slide.bodies.map(body => body.text).join('\n\n');
835
+ }
836
+
837
+ previewContentDiv.textContent = previewText.substring(0, 500) + (previewText.length > 500 ? '...' : '');
838
+
839
+ previewDiv.appendChild(previewTitle);
840
+ previewDiv.appendChild(previewContentDiv);
841
+ pptxSlides.push(previewDiv);
842
+
843
+ // Update progress
844
+ const progress = Math.round((index / result.slides.length) * 100);
845
+ progressBar.style.width = `${progress}%`;
846
+ progressPercent.textContent = `${progress}%`;
847
+ progressText.textContent = `PROCESSING SLIDE ${index + 1} OF ${result.slides.length}...`;
848
+ });
849
+
850
+ // Display preview
851
+ pptxSlides.forEach(slide => {
852
+ previewContent.appendChild(slide);
853
+ });
854
+
855
+ // Post-process the markdown
856
+ currentMarkdown = postProcessMarkdown(markdownContent);
857
+ markdownOutput.textContent = currentMarkdown;
858
+ }
859
+
860
+ function postProcessMarkdown(text) {
861
+ // Simple markdown formatting
862
+ let result = text;
863
+
864
+ // Detect headings based on font size (simplified)
865
+ if (document.getElementById('detectHeadings').checked) {
866
+ // This is a simplified version - a real implementation would need more sophisticated detection
867
+ result = result.replace(/(\n\n[A-Z][^\n]{10,}\n)/g, '\n\n## $1');
868
+ }
869
+
870
+ // Preserve some layout structure
871
+ if (document.getElementById('preserveLayout').checked) {
872
+ // Convert multiple newlines to markdown paragraphs
873
+ result = result.replace(/\n{3,}/g, '\n\n');
874
+ }
875
+
876
+ return result;
877
+ }
878
+ });
879
+ </script>
880
+
881
+ <!-- Inline Service Worker -->
882
+ <script>
883
+ // Register service worker with inline code
884
+ if ('serviceWorker' in navigator) {
885
+ const swCode = `
886
+ const CACHE_NAME = 'doc-converter-v3';
887
+ const ASSETS_TO_CACHE = [
888
+ '/',
889
+ '/index.html',
890
+ 'https://cdn.tailwindcss.com',
891
+ 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.12.313/pdf.min.js',
892
+ 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.12.313/pdf.worker.min.js',
893
+ 'https://cdnjs.cloudflare.com/ajax/libs/showdown/2.1.0/showdown.min.js',
894
+ 'https://cdn.jsdelivr.net/npm/[email protected]/dist/jszip.min.js',
895
+ 'https://cdnjs.cloudflare.com/ajax/libs/mammoth/1.4.0/mammoth.browser.min.js',
896
+ 'https://cdn.jsdelivr.net/npm/[email protected]/dist/pptx2md.min.js',
897
+ 'https://kit.fontawesome.com/a076d05399.js',
898
+ 'https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;700&display=swap'
899
+ ];
900
+
901
+ self.addEventListener('install', (event) => {
902
+ event.waitUntil(
903
+ caches.open(CACHE_NAME)
904
+ .then((cache) => {
905
+ return cache.addAll(ASSETS_TO_CACHE);
906
+ })
907
+ );
908
+ });
909
+
910
+ self.addEventListener('fetch', (event) => {
911
+ event.respondWith(
912
+ caches.match(event.request)
913
+ .then((response) => {
914
+ return response || fetch(event.request);
915
+ })
916
+ );
917
+ });
918
+
919
+ self.addEventListener('activate', (event) => {
920
+ const cacheWhitelist = [CACHE_NAME];
921
+ event.waitUntil(
922
+ caches.keys().then((cacheNames) => {
923
+ return Promise.all(
924
+ cacheNames.map((cacheName) => {
925
+ if (cacheWhitelist.indexOf(cacheName) === -1) {
926
+ return caches.delete(cacheName);
927
+ }
928
+ })
929
+ );
930
+ })
931
+ );
932
+ });
933
+ `;
934
+
935
+ const blob = new Blob([swCode], { type: 'application/javascript' });
936
+ const swUrl = URL.createObjectURL(blob);
937
+
938
+ navigator.serviceWorker.register(swUrl)
939
+ .then(registration => {
940
+ console.log('Service Worker registered with scope:', registration.scope);
941
+ })
942
+ .catch(error => {
943
+ console.log('Service Worker registration failed:', error);
944
+ });
945
+ }
946
+ </script>
947
+ <p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - 🧬 <a href="https://enzostvs-deepsite.hf.space?remix=ihansel/documentconversion" style="color: #fff;text-decoration: underline;" target="_blank" >Remix</a></p></body>
948
+ </html>