updated for resume translation
Browse files- app.py +43 -16
- sample_resume.pdf +0 -0
app.py
CHANGED
@@ -16,7 +16,7 @@ dwani.api_base = os.getenv("DWANI_API_BASE_URL")
|
|
16 |
logger.debug("DWANI_API_KEY: %s", "Set" if dwani.api_key else "Not set")
|
17 |
logger.debug("DWANI_API_BASE_URL: %s", dwani.api_base)
|
18 |
|
19 |
-
# Language options for dropdowns
|
20 |
language_options = [
|
21 |
("English", "eng_Latn"),
|
22 |
("Kannada", "kan_Knda"),
|
@@ -60,18 +60,35 @@ def parse_page_numbers(pages_str):
|
|
60 |
|
61 |
def results_to_markdown(results):
|
62 |
"""
|
63 |
-
Convert the results dictionary into a Markdown formatted string
|
|
|
64 |
"""
|
65 |
md_lines = []
|
66 |
for page, content in results.items():
|
67 |
-
md_lines.append(f"## {page}")
|
68 |
if "error" in content:
|
69 |
-
md_lines.append(f"**Error:** {content['error']}")
|
70 |
else:
|
71 |
-
md_lines.append(
|
72 |
-
md_lines.append(
|
73 |
-
md_lines.append(
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
md_lines.append("\n---\n")
|
76 |
return "\n".join(md_lines)
|
77 |
|
@@ -108,24 +125,34 @@ def process_pdf(pdf_file, pages_str, prompt, src_lang, tgt_lang):
|
|
108 |
logger.debug("Calling API with file: %s, pages: %s, prompt: %s, src_lang: %s, tgt_lang: %s",
|
109 |
file_path, pages, prompt, src_lang_code, tgt_lang_code)
|
110 |
|
111 |
-
system_prompt = "Do not return any asterisk"
|
112 |
-
|
113 |
results = {}
|
114 |
for page_number in pages:
|
115 |
try:
|
116 |
-
result = dwani.Documents.
|
117 |
file_path=file_path,
|
118 |
-
prompt=f"{prompt} {system_prompt}",
|
119 |
page_number=page_number,
|
120 |
src_lang=src_lang_code,
|
121 |
tgt_lang=tgt_lang_code
|
122 |
)
|
123 |
logger.debug("API response for page %d: %s", page_number, result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
results[f"Page {page_number}"] = {
|
125 |
-
"
|
126 |
-
"
|
127 |
-
"
|
128 |
-
|
|
|
129 |
}
|
130 |
except dwani.exceptions.DhwaniAPIError as e:
|
131 |
logger.error("Dhwani API error on page %d: %s", page_number, str(e))
|
|
|
16 |
logger.debug("DWANI_API_KEY: %s", "Set" if dwani.api_key else "Not set")
|
17 |
logger.debug("DWANI_API_BASE_URL: %s", dwani.api_base)
|
18 |
|
19 |
+
# Language options for dropdowns (display name and code)
|
20 |
language_options = [
|
21 |
("English", "eng_Latn"),
|
22 |
("Kannada", "kan_Knda"),
|
|
|
60 |
|
61 |
def results_to_markdown(results):
|
62 |
"""
|
63 |
+
Convert the results dictionary into a Markdown formatted string,
|
64 |
+
formatting the translated response to preserve structure using <pre> tags.
|
65 |
"""
|
66 |
md_lines = []
|
67 |
for page, content in results.items():
|
68 |
+
md_lines.append(f"## {page}\n")
|
69 |
if "error" in content:
|
70 |
+
md_lines.append(f"**Error:** {content['error']}\n")
|
71 |
else:
|
72 |
+
md_lines.append("**Original Text:**\n\n```")
|
73 |
+
md_lines.append(content.get('Original Text', '') + "\n")
|
74 |
+
md_lines.append("```\n")
|
75 |
+
|
76 |
+
response_text = content.get('Response', '')
|
77 |
+
if response_text:
|
78 |
+
md_lines.append("Response:\n\n" + response_text + "\n")
|
79 |
+
|
80 |
+
md_lines.append("**Processed Page:** " + str(content.get('Processed Page', '')) + "\n")
|
81 |
+
|
82 |
+
translated = content.get('Translated Response', '')
|
83 |
+
|
84 |
+
# Normalize newlines
|
85 |
+
translated = translated.replace('\r\n', '\n').replace('\r', '\n')
|
86 |
+
|
87 |
+
# Use <pre> tags to preserve formatting exactly
|
88 |
+
md_lines.append("**Translated Response:**\n\n<pre>")
|
89 |
+
md_lines.append(translated)
|
90 |
+
md_lines.append("</pre>")
|
91 |
+
|
92 |
md_lines.append("\n---\n")
|
93 |
return "\n".join(md_lines)
|
94 |
|
|
|
125 |
logger.debug("Calling API with file: %s, pages: %s, prompt: %s, src_lang: %s, tgt_lang: %s",
|
126 |
file_path, pages, prompt, src_lang_code, tgt_lang_code)
|
127 |
|
|
|
|
|
128 |
results = {}
|
129 |
for page_number in pages:
|
130 |
try:
|
131 |
+
result = dwani.Documents.run_extract(
|
132 |
file_path=file_path,
|
|
|
133 |
page_number=page_number,
|
134 |
src_lang=src_lang_code,
|
135 |
tgt_lang=tgt_lang_code
|
136 |
)
|
137 |
logger.debug("API response for page %d: %s", page_number, result)
|
138 |
+
|
139 |
+
# New response format: result contains 'pages' list
|
140 |
+
page_data = None
|
141 |
+
for p in result.get('pages', []):
|
142 |
+
if p.get('processed_page') == page_number:
|
143 |
+
page_data = p
|
144 |
+
break
|
145 |
+
|
146 |
+
if page_data is None:
|
147 |
+
results[f"Page {page_number}"] = {"error": "No data returned for this page"}
|
148 |
+
continue
|
149 |
+
|
150 |
results[f"Page {page_number}"] = {
|
151 |
+
"Processed Page": page_data.get("processed_page", "N/A"),
|
152 |
+
"Original Text": page_data.get("page_content", "N/A"),
|
153 |
+
"Translated Response": page_data.get("translated_content", "N/A"),
|
154 |
+
# The old 'Response' key is not in new data; set empty string
|
155 |
+
"Response": ""
|
156 |
}
|
157 |
except dwani.exceptions.DhwaniAPIError as e:
|
158 |
logger.error("Dhwani API error on page %d: %s", page_number, str(e))
|
sample_resume.pdf
ADDED
Binary file (74.3 kB). View file
|
|