File size: 8,987 Bytes
0b887a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80e430b
 
0b887a8
 
 
 
 
80e430b
0b887a8
 
 
80e430b
 
 
 
 
 
 
 
 
 
 
0b887a8
80e430b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b887a8
 
80e430b
 
 
 
 
0b887a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80e430b
abd0964
0b887a8
abd0964
0b887a8
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
import gradio as gr
from openai import OpenAI
import base64
from PIL import Image
import io
import fitz  # PyMuPDF
import tempfile
import os

# --- HELPER FUNCTIONS ---
def convert_pdf_to_images(pdf_file):
    """Convert PDF to list of PIL Images"""
    images = []
    try:
        # Save uploaded file to a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
            tmp_file.write(pdf_file)
            tmp_file_path = tmp_file.name
        
        # Open the PDF file
        pdf_document = fitz.open(tmp_file_path)
        
        # Iterate through each page
        for page_num in range(len(pdf_document)):
            page = pdf_document.load_page(page_num)
            pix = page.get_pixmap()
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            images.append(img)
        
        # Clean up
        pdf_document.close()
        os.unlink(tmp_file_path)
        
    except Exception as e:
        raise gr.Error(f"Error converting PDF: {e}")
    return images

def image_to_base64(image):
    """Convert PIL Image to base64 string"""
    with io.BytesIO() as buffer:
        image.save(buffer, format="PNG")
        return base64.b64encode(buffer.getvalue()).decode("utf-8")

def generate_summary(extracted_texts, api_key):
    """Generate a comprehensive summary of all extracted texts"""
    try:
        client = OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=api_key
        )
        
        summary_prompt = f"""
        You are an expert document analyst. Below are the extracted contents from multiple pages of a document.
        Please provide a comprehensive, detailed summary that:
        1. Organizes all key information logically
        2. Identifies relationships between data points
        3. Highlights important figures, dates, names
        4. Presents the information in a clear, structured format
        
        Extracted contents from pages:
        {extracted_texts}
        
        Comprehensive Summary:
        """
        
        response = client.chat.completions.create(
            model="opengvlab/internvl3-14b:free",
            messages=[
                {"role": "system", "content": "You are Dalton, an expert in analyzing and summarizing document contents."},
                {"role": "user", "content": summary_prompt}
            ],
            max_tokens=2048
        )
        
        return response.choices[0].message.content
        
    except Exception as e:
        raise gr.Error(f"Error generating summary: {e}")

def analyze_document(api_key, user_prompt, uploaded_file):
    """Main processing function"""
    if not api_key:
        raise gr.Error("Please enter your OpenRouter API key")
    
    if uploaded_file is None:
        raise gr.Error("Please upload a document")
    
    images_to_analyze = []
    file_ext = os.path.splitext(uploaded_file.name)[1].lower()
    
    # Handle PDF or image
    if file_ext == '.pdf':
        with open(uploaded_file.name, "rb") as f:
            pdf_data = f.read()
        pdf_images = convert_pdf_to_images(pdf_data)
        images_to_analyze = pdf_images  # For simplicity, using all pages
    else:
        image = Image.open(uploaded_file.name)
        images_to_analyze = [image]
    
    # Process each image
    all_results = []
    extracted_texts = []
    
    for idx, image in enumerate(images_to_analyze, 1):
        try:
            client = OpenAI(
                base_url="https://openrouter.ai/api/v1",
                api_key=api_key
            )
            
            image_base64 = image_to_base64(image)
            
            response = client.chat.completions.create(
                model="opengvlab/internvl3-14b:free",
                messages=[
                    {"role": "system", "content": "You are Dalton, an expert in understanding images that can analyze images and provide detailed descriptions."},
                    {"role": "user", "content": [
                        {"type": "text", "text": user_prompt},
                        {"type": "image_url", "image_url": {
                            "url": f"data:image/png;base64,{image_base64}"
                        }}
                    ]}
                ],
                max_tokens=1024
            )

            result = response.choices[0].message.content
            extracted_texts.append(f"### Page {idx}\n{result}\n")
            all_results.append(f"## πŸ“„ Page {idx} Results\n{result}\n---\n")

        except Exception as e:
            raise gr.Error(f"Error analyzing page {idx}: {e}")
    
    # Generate summary if multiple pages
    markdown_output = "\n".join(all_results)
    
    if len(extracted_texts) > 1:
        summary = generate_summary("\n".join(extracted_texts), api_key)
        markdown_output += f"\n## πŸ“ Comprehensive Summary\n{summary}\n"
        
        # Add structured data section
        markdown_output += f"\n## πŸ” Key Data Extracted\n"
        markdown_output += "- **Important Figures**: [Extracted values]\n"
        markdown_output += "- **Critical Dates**: [Extracted dates]\n"
        markdown_output += "- **Main Entities**: [Identified names/companies]\n"
        markdown_output += "- **Action Items**: [Key tasks identified]\n"
    
    # Add document metadata
    markdown_output += f"\n---\n*Document processed: {uploaded_file.name}*"
    
    return markdown_output

# Custom CSS for dark theme with green text
custom_css = """
:root {
    --primary: #00ff00;
    --primary-50: #00ff0033;
    --primary-100: #00ff0066;
    --primary-200: #00ff0099;
    --primary-300: #00ff00cc;
    --secondary: #00cc00;
    --secondary-50: #00cc0033;
    --secondary-100: #00cc0066;
    --secondary-200: #00cc0099;
    --secondary-300: #00cc00cc;
    --color-background-primary: #000000;
    --color-background-secondary: #111111;
    --color-background-tertiary: #222222;
    --text-color: #00ff00;
    --block-background-fill: #111111;
    --block-border-color: #00aa00;
    --block-label-text-color: #00ff00;
    --block-title-text-color: #00ff00;
    --input-background-fill: #111111;
    --input-border-color: #00aa00;
    --input-text-color: #00ff00;
}

body {
    background-color: var(--color-background-primary) !important;
    color: var(--text-color) !important;
}

.markdown-output {
    padding: 20px;
    border-radius: 8px;
    background: var(--color-background-secondary);
    border: 1px solid var(--block-border-color);
    max-height: 600px;
    overflow-y: auto;
    color: var(--text-color) !important;
}

.markdown-output h1, 
.markdown-output h2, 
.markdown-output h3 {
    color: var(--primary) !important;
    border-bottom: 1px solid var(--primary-300);
}

.markdown-output a {
    color: var(--secondary) !important;
}

.markdown-output code {
    background-color: var(--color-background-tertiary);
    color: var(--secondary);
}

.markdown-output pre {
    background-color: var(--color-background-tertiary) !important;
    border: 1px solid var(--block-border-color);
}

.markdown-output ul, 
.markdown-output ol {
    color: var(--text-color);
}

button {
    background: var(--primary) !important;
    color: black !important;
    font-weight: bold !important;
}

button:hover {
    background: var(--primary-300) !important;
}
"""

# Create dark theme
dark_green_theme = gr.themes.Default(
    primary_hue="green",
    secondary_hue="green",
    neutral_hue="green",
).set(
    background_fill_primary="#000000",
    background_fill_secondary="#111111",
    block_background_fill="#111111",
    border_color_accent="#00aa00",
    block_label_text_color="#00ff00",
    body_text_color="#00ff00",
    button_primary_text_color="#000000",
)

# --- GRADIO INTERFACE ---
with gr.Blocks(
    title="DocSum - Document Summarizer", 
    theme=dark_green_theme,
    css=custom_css
) as demo:
    gr.Markdown("# 🧾 DocSum")
    gr.Markdown("Document Summarizer Powered by VLM β€’ Developed by [Koshur AI](https://koshurai.com)")
    
    with gr.Row():
        api_key = gr.Textbox(
            label="πŸ”‘ OpenRouter API Key",
            type="password",
            placeholder="Enter your OpenRouter API key"
        )
        user_prompt = gr.Textbox(
            label="πŸ“ Enter Your Prompt",
            value="Extract all content structurally",
            placeholder="What would you like to extract?"
        )
    
    uploaded_file = gr.File(
        label="Upload Document (PDF/Image)",
        file_types=[".pdf", ".jpg", ".jpeg", ".png"]
    )
    
    submit_btn = gr.Button("πŸ” Analyze Document", variant="primary")
    
    # Markdown output with custom class
    output = gr.Markdown(
        label="Analysis Results",
        elem_classes=["markdown-output"]
    )
    
    submit_btn.click(
        fn=analyze_document,
        inputs=[api_key, user_prompt, uploaded_file],
        outputs=output
    )

if __name__ == "__main__":
    demo.launch()