Sahana31 commited on
Commit
902cca3
·
1 Parent(s): 2c4d9cc

updated for resume translation

Browse files
Files changed (2) hide show
  1. app.py +43 -16
  2. sample_resume.pdf +0 -0
app.py CHANGED
@@ -16,7 +16,7 @@ dwani.api_base = os.getenv("DWANI_API_BASE_URL")
16
  logger.debug("DWANI_API_KEY: %s", "Set" if dwani.api_key else "Not set")
17
  logger.debug("DWANI_API_BASE_URL: %s", dwani.api_base)
18
 
19
- # Language options for dropdowns
20
  language_options = [
21
  ("English", "eng_Latn"),
22
  ("Kannada", "kan_Knda"),
@@ -60,18 +60,35 @@ def parse_page_numbers(pages_str):
60
 
61
  def results_to_markdown(results):
62
  """
63
- Convert the results dictionary into a Markdown formatted string.
 
64
  """
65
  md_lines = []
66
  for page, content in results.items():
67
- md_lines.append(f"## {page}")
68
  if "error" in content:
69
- md_lines.append(f"**Error:** {content['error']}")
70
  else:
71
- md_lines.append(f"**Original Text:**\n\n``````")
72
- md_lines.append(f"**Response:**\n\n{content.get('Response', '')}")
73
- md_lines.append(f"**Processed Page:** {content.get('Processed Page', '')}")
74
- md_lines.append(f"**Translated Response:**\n\n{content.get('Translated Response', '')}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  md_lines.append("\n---\n")
76
  return "\n".join(md_lines)
77
 
@@ -108,24 +125,34 @@ def process_pdf(pdf_file, pages_str, prompt, src_lang, tgt_lang):
108
  logger.debug("Calling API with file: %s, pages: %s, prompt: %s, src_lang: %s, tgt_lang: %s",
109
  file_path, pages, prompt, src_lang_code, tgt_lang_code)
110
 
111
- system_prompt = "Do not return any asterisk"
112
-
113
  results = {}
114
  for page_number in pages:
115
  try:
116
- result = dwani.Documents.run_doc_query(
117
  file_path=file_path,
118
- prompt=f"{prompt} {system_prompt}",
119
  page_number=page_number,
120
  src_lang=src_lang_code,
121
  tgt_lang=tgt_lang_code
122
  )
123
  logger.debug("API response for page %d: %s", page_number, result)
 
 
 
 
 
 
 
 
 
 
 
 
124
  results[f"Page {page_number}"] = {
125
- "Original Text": result.get("original_text", "N/A"),
126
- "Response": result.get("response", "N/A"),
127
- "Processed Page": result.get("processed_page", "N/A"),
128
- "Translated Response": result.get("translated_response", "N/A")
 
129
  }
130
  except dwani.exceptions.DhwaniAPIError as e:
131
  logger.error("Dhwani API error on page %d: %s", page_number, str(e))
 
16
  logger.debug("DWANI_API_KEY: %s", "Set" if dwani.api_key else "Not set")
17
  logger.debug("DWANI_API_BASE_URL: %s", dwani.api_base)
18
 
19
+ # Language options for dropdowns (display name and code)
20
  language_options = [
21
  ("English", "eng_Latn"),
22
  ("Kannada", "kan_Knda"),
 
60
 
61
  def results_to_markdown(results):
62
  """
63
+ Convert the results dictionary into a Markdown formatted string,
64
+ formatting the translated response to preserve structure using <pre> tags.
65
  """
66
  md_lines = []
67
  for page, content in results.items():
68
+ md_lines.append(f"## {page}\n")
69
  if "error" in content:
70
+ md_lines.append(f"**Error:** {content['error']}\n")
71
  else:
72
+ md_lines.append("**Original Text:**\n\n```")
73
+ md_lines.append(content.get('Original Text', '') + "\n")
74
+ md_lines.append("```\n")
75
+
76
+ response_text = content.get('Response', '')
77
+ if response_text:
78
+ md_lines.append("Response:\n\n" + response_text + "\n")
79
+
80
+ md_lines.append("**Processed Page:** " + str(content.get('Processed Page', '')) + "\n")
81
+
82
+ translated = content.get('Translated Response', '')
83
+
84
+ # Normalize newlines
85
+ translated = translated.replace('\r\n', '\n').replace('\r', '\n')
86
+
87
+ # Use <pre> tags to preserve formatting exactly
88
+ md_lines.append("**Translated Response:**\n\n<pre>")
89
+ md_lines.append(translated)
90
+ md_lines.append("</pre>")
91
+
92
  md_lines.append("\n---\n")
93
  return "\n".join(md_lines)
94
 
 
125
  logger.debug("Calling API with file: %s, pages: %s, prompt: %s, src_lang: %s, tgt_lang: %s",
126
  file_path, pages, prompt, src_lang_code, tgt_lang_code)
127
 
 
 
128
  results = {}
129
  for page_number in pages:
130
  try:
131
+ result = dwani.Documents.run_extract(
132
  file_path=file_path,
 
133
  page_number=page_number,
134
  src_lang=src_lang_code,
135
  tgt_lang=tgt_lang_code
136
  )
137
  logger.debug("API response for page %d: %s", page_number, result)
138
+
139
+ # New response format: result contains 'pages' list
140
+ page_data = None
141
+ for p in result.get('pages', []):
142
+ if p.get('processed_page') == page_number:
143
+ page_data = p
144
+ break
145
+
146
+ if page_data is None:
147
+ results[f"Page {page_number}"] = {"error": "No data returned for this page"}
148
+ continue
149
+
150
  results[f"Page {page_number}"] = {
151
+ "Processed Page": page_data.get("processed_page", "N/A"),
152
+ "Original Text": page_data.get("page_content", "N/A"),
153
+ "Translated Response": page_data.get("translated_content", "N/A"),
154
+ # The old 'Response' key is not in new data; set empty string
155
+ "Response": ""
156
  }
157
  except dwani.exceptions.DhwaniAPIError as e:
158
  logger.error("Dhwani API error on page %d: %s", page_number, str(e))
sample_resume.pdf ADDED
Binary file (74.3 kB). View file