File size: 8,194 Bytes
ae114b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
902cca3
ae114b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
902cca3
 
ae114b0
 
 
902cca3
ae114b0
902cca3
ae114b0
902cca3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae114b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
902cca3
ae114b0
 
 
 
 
 
902cca3
 
 
 
 
 
 
 
 
 
 
 
ae114b0
902cca3
 
 
 
 
ae114b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
import gradio as gr
import os
import tempfile
import dwani
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Configure dwani API settings
dwani.api_key = os.getenv("DWANI_API_KEY")
dwani.api_base = os.getenv("DWANI_API_BASE_URL")

# Log API configuration for debugging
logger.debug("DWANI_API_KEY: %s", "Set" if dwani.api_key else "Not set")
logger.debug("DWANI_API_BASE_URL: %s", dwani.api_base)

# Language options for dropdowns (display name and code)
language_options = [
    ("English", "eng_Latn"),
    ("Kannada", "kan_Knda"),
    ("Hindi", "hin_Deva")
]

# Create lists for Gradio dropdowns (display names only)
language_names = [lang[0] for lang in language_options]

# Map display names to language codes
lang_code_map = {lang[0]: lang[1] for lang in language_options}


def parse_page_numbers(pages_str):
    """
    Parse a string of comma-separated page numbers/ranges into a sorted list of unique integers.
    Example inputs:
        "1,3,5"
        "1-3,5"
    """
    pages = set()
    for part in pages_str.split(","):
        part = part.strip()
        if "-" in part:
            try:
                start, end = map(int, part.split("-"))
                if start > end or start < 1:
                    continue
                pages.update(range(start, end + 1))
            except ValueError:
                continue
        else:
            try:
                page = int(part)
                if page >= 1:
                    pages.add(page)
            except ValueError:
                continue
    return sorted(pages)


def results_to_markdown(results):
    """
    Convert the results dictionary into a Markdown formatted string,
    formatting the translated response to preserve structure using <pre> tags.
    """
    md_lines = []
    for page, content in results.items():
        md_lines.append(f"## {page}\n")
        if "error" in content:
            md_lines.append(f"**Error:** {content['error']}\n")
        else:
            md_lines.append("**Original Text:**\n\n```")
            md_lines.append(content.get('Original Text', '') + "\n")
            md_lines.append("```\n")

            response_text = content.get('Response', '')
            if response_text:
                md_lines.append("Response:\n\n" + response_text + "\n")

            md_lines.append("**Processed Page:** " + str(content.get('Processed Page', '')) + "\n")

            translated = content.get('Translated Response', '')

            # Normalize newlines
            translated = translated.replace('\r\n', '\n').replace('\r', '\n')

            # Use <pre> tags to preserve formatting exactly
            md_lines.append("**Translated Response:**\n\n<pre>")
            md_lines.append(translated)
            md_lines.append("</pre>")

        md_lines.append("\n---\n")
    return "\n".join(md_lines)


def process_pdf(pdf_file, pages_str, prompt, src_lang, tgt_lang):
    logger.debug("Received inputs - PDF: %s, Pages: %s, Prompt: %s, Source Lang: %s, Target Lang: %s",
                 pdf_file, pages_str, prompt, src_lang, tgt_lang)

    # Validate inputs
    if not pdf_file:
        logger.error("No PDF file provided")
        return "Error: Please upload a PDF file", None

    if not prompt.strip():
        logger.error("Prompt is empty")
        return "Error: Please provide a non-empty prompt", None

    pages = parse_page_numbers(pages_str)
    if not pages:
        logger.error("Invalid or empty page numbers input: %s", pages_str)
        return "Error: Please provide valid page numbers (e.g., 1,3,5 or 1-3)", None

    # Get language codes
    src_lang_code = lang_code_map.get(src_lang)
    tgt_lang_code = lang_code_map.get(tgt_lang)

    if not src_lang_code or not tgt_lang_code:
        logger.error("Invalid language selection - Source: %s, Target: %s", src_lang, tgt_lang)
        return "Error: Invalid source or target language selection", None

    # Get file path from Gradio File object
    file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file

    logger.debug("Calling API with file: %s, pages: %s, prompt: %s, src_lang: %s, tgt_lang: %s",
                 file_path, pages, prompt, src_lang_code, tgt_lang_code)

    results = {}
    for page_number in pages:
        try:
            result = dwani.Documents.run_extract(
                file_path=file_path,
                page_number=page_number,
                src_lang=src_lang_code,
                tgt_lang=tgt_lang_code
            )
            logger.debug("API response for page %d: %s", page_number, result)

            # New response format: result contains 'pages' list
            page_data = None
            for p in result.get('pages', []):
                if p.get('processed_page') == page_number:
                    page_data = p
                    break

            if page_data is None:
                results[f"Page {page_number}"] = {"error": "No data returned for this page"}
                continue

            results[f"Page {page_number}"] = {
                "Processed Page": page_data.get("processed_page", "N/A"),
                "Original Text": page_data.get("page_content", "N/A"),
                "Translated Response": page_data.get("translated_content", "N/A"),
                # The old 'Response' key is not in new data; set empty string
                "Response": ""
            }
        except dwani.exceptions.DhwaniAPIError as e:
            logger.error("Dhwani API error on page %d: %s", page_number, str(e))
            results[f"Page {page_number}"] = {"error": f"API error: {str(e)}"}
        except Exception as e:
            logger.error("Unexpected error on page %d: %s", page_number, str(e))
            results[f"Page {page_number}"] = {"error": f"Unexpected error: {str(e)}"}

    # Convert results to markdown text
    markdown_text = results_to_markdown(results)

    # Save markdown to a temporary file for download
    temp_md_file = tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode='w', encoding='utf-8')
    temp_md_file.write(markdown_text)
    temp_md_file.close()

    # Return markdown text and file path for download
    return markdown_text, temp_md_file.name


# Define Gradio interface
with gr.Blocks(title="PDF Custom Prompt Processor with Multi-Page Support") as demo:
    gr.Markdown("# PDF Custom Prompt Processor")
    gr.Markdown("Upload a PDF, specify page numbers (comma-separated or ranges), enter a prompt, and select source and target languages.")

    with gr.Row():
        with gr.Column():
            pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
            pages_input = gr.Textbox(
                label="Page Numbers",
                placeholder="e.g., 1,3,5 or 1-3",
                value="1",
                lines=1
            )
            prompt = gr.Textbox(
                label="Custom Prompt",
                placeholder="e.g., List the key points",
                value="List the key points",
                lines=3
            )
            src_lang_input = gr.Dropdown(
                label="Source Language",
                choices=language_names,
                value="English"
            )
            tgt_lang_input = gr.Dropdown(
                label="Target Language",
                choices=language_names,
                value="Kannada"
            )
            submit_btn = gr.Button("Process")

        with gr.Column():
            output_md = gr.Markdown(label="Response (Markdown)")
            download_md = gr.File(label="Download Markdown File")

    submit_btn.click(
        fn=process_pdf,
        inputs=[pdf_input, pages_input, prompt, src_lang_input, tgt_lang_input],
        outputs=[output_md, download_md]
    )


# Launch the interface
if __name__ == "__main__":
    if not dwani.api_key or not dwani.api_base:
        logger.error("API key or base URL not set. Please set DWANI_API_KEY and DWANI_API_BASE_URL environment variables.")
        print("Error: Please set DWANI_API_KEY and DWANI_API_BASE_URL environment variables.")
    else:
        logger.debug("Starting Gradio interface...")
        demo.launch()