Sahana31 commited on
Commit
54062db
·
1 Parent(s): 7b4c1dc

translated resume

Browse files
Files changed (2) hide show
  1. app.py +210 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import logging
3
+ import dwani
4
+ import os
5
+
6
+ # Set up logging
7
+ logging.basicConfig(level=logging.INFO)
8
+ logger = logging.getLogger(__name__)
9
+
10
+ # Configure dwani API settings from environment variables
11
+ dwani.api_key = os.getenv("DWANI_API_KEY")
12
+ dwani.api_base = os.getenv("DWANI_API_BASE_URL")
13
+
14
+
15
+ def translate_to_kannada(text):
16
+ """Translate English text to Kannada using dwani.Translate.run_translate."""
17
+ if not text or text.strip() == "":
18
+ return ""
19
+
20
+ try:
21
+ resp = dwani.Translate.run_translate(
22
+ sentences=text,
23
+ src_lang="english",
24
+ tgt_lang="kannada"
25
+ )
26
+ if isinstance(resp, dict):
27
+ translated = resp.get("translated_text")
28
+ if translated:
29
+ return translated.strip()
30
+ if "translations" in resp and isinstance(resp["translations"], list):
31
+ return " ".join(t.strip() for t in resp["translations"] if isinstance(t, str))
32
+ return str(resp).strip()
33
+ elif isinstance(resp, str):
34
+ return resp.strip()
35
+ else:
36
+ return str(resp).strip()
37
+ except Exception as e:
38
+ logger.error(f"Translation error: {e}")
39
+ return f"Translation error: {e}"
40
+
41
+
42
+ def process_pdf(pdf_file):
43
+ logger.debug("Received inputs - PDF: %s", pdf_file)
44
+
45
+ if not pdf_file:
46
+ logger.error("No PDF file provided")
47
+ return None
48
+
49
+ file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
50
+ pages = {1, 2}
51
+ src_lang_code = "eng_Latn"
52
+ tgt_lang_code = "kan_Knda"
53
+
54
+ results = {}
55
+ for page_number in pages:
56
+ try:
57
+ result = dwani.Documents.run_extract(
58
+ file_path=file_path,
59
+ page_number=page_number,
60
+ src_lang=src_lang_code,
61
+ tgt_lang=tgt_lang_code
62
+ )
63
+ page_data = None
64
+ for p in result.get('pages', []):
65
+ if p.get('processed_page') == page_number:
66
+ page_data = p
67
+ break
68
+
69
+ if page_data is None:
70
+ results[f"Page {page_number}"] = {"error": "No data returned for this page"}
71
+ continue
72
+
73
+ results[f"Page {page_number}"] = {
74
+ "Original Text": page_data.get("page_content", "N/A"),
75
+ "Response": ""
76
+ }
77
+ except dwani.exceptions.DwaniAPIError as e:
78
+ logger.error(f"Dhwani API error on page {page_number}: {str(e)}")
79
+ results[f"Page {page_number}"] = {"error": f"API error: {str(e)}"}
80
+ except Exception as e:
81
+ logger.error(f"Unexpected error on page {page_number}: {str(e)}")
82
+ results[f"Page {page_number}"] = {"error": f"Unexpected error: {str(e)}"}
83
+
84
+ contact_en = extract_contact_details(results)
85
+ objective_en = extract_objective(results)
86
+ education_en = extract_education_details(results)
87
+ work_experience_en = extract_workexperience_details(results)
88
+ skills_en = extract_skill(results)
89
+ certifications_en = extract_certifications(results)
90
+
91
+ contact_kan = translate_to_kannada(contact_en)
92
+ objective_kan = translate_to_kannada(objective_en)
93
+ education_kan = translate_to_kannada(education_en)
94
+ work_experience_kan = translate_to_kannada(work_experience_en)
95
+ skills_kan = translate_to_kannada(skills_en)
96
+ certifications_kan = translate_to_kannada(certifications_en)
97
+
98
+ formatted_resume = format_resume(contact_kan, objective_kan, education_kan, work_experience_kan, skills_kan, certifications_kan)
99
+
100
+ text_filename = "resume.txt"
101
+ with open(text_filename, "w", encoding="utf-8") as f:
102
+ f.write(formatted_resume)
103
+
104
+ return text_filename
105
+
106
+
107
+ def extract_text_from_response(chat_response):
108
+ if isinstance(chat_response, dict):
109
+ for key in ("text", "response", "content"):
110
+ if key in chat_response and isinstance(chat_response[key], str):
111
+ return chat_response[key]
112
+ return str(chat_response)
113
+ elif isinstance(chat_response, str):
114
+ return chat_response
115
+ else:
116
+ return str(chat_response)
117
+
118
+
119
+ def extract_contact_details(extracted_resume):
120
+ resume_str = str(extracted_resume)
121
+ prompt = resume_str + " return only contact details from the resume "
122
+ response = dwani.Chat.direct(prompt=prompt, model="gemma3")
123
+ return extract_text_from_response(response)
124
+
125
+
126
+ def extract_objective(extracted_resume):
127
+ resume_str = str(extracted_resume)
128
+ prompt = resume_str + " return only objective or professional summary from the resume "
129
+ response = dwani.Chat.direct(prompt=prompt, model="gemma3")
130
+ return extract_text_from_response(response)
131
+
132
+
133
+ def extract_education_details(extracted_resume):
134
+ resume_str = str(extracted_resume)
135
+ prompt = resume_str + " return only education details from the resume "
136
+ response = dwani.Chat.direct(prompt=prompt, model="gemma3")
137
+ return extract_text_from_response(response)
138
+
139
+
140
+ def extract_workexperience_details(extracted_resume):
141
+ resume_str = str(extracted_resume)
142
+ prompt = resume_str + " return only work experience from the resume "
143
+ response = dwani.Chat.direct(prompt=prompt, model="gemma3")
144
+ return extract_text_from_response(response)
145
+
146
+
147
+ def extract_skill(extracted_resume):
148
+ resume_str = str(extracted_resume)
149
+ prompt = resume_str + " return only skills from the resume "
150
+ response = dwani.Chat.direct(prompt=prompt, model="gemma3")
151
+ return extract_text_from_response(response)
152
+
153
+
154
+ def extract_certifications(extracted_resume):
155
+ resume_str = str(extracted_resume)
156
+ prompt = resume_str + " return only certifications from the resume "
157
+ response = dwani.Chat.direct(prompt=prompt, model="gemma3")
158
+ return extract_text_from_response(response)
159
+
160
+
161
+ def safe_strip(value):
162
+ if isinstance(value, dict):
163
+ value = extract_text_from_response(value)
164
+ return str(value).strip()
165
+
166
+
167
+ def format_resume(contact, objective, education, work_experience, skills, certifications):
168
+ return f"""# Resume (Kannada)
169
+
170
+ ## ಸಂಪರ್ಕ ವಿವರಗಳು (Contact Details)
171
+ {safe_strip(contact)}
172
+
173
+ ## ಉದ್ದೇಶ (Objective)
174
+ {safe_strip(objective)}
175
+
176
+ ## ಶಿಕ್ಷಣ (Education)
177
+ {safe_strip(education)}
178
+
179
+ ## ಕೆಲಸದ ಅನುಭವ (Work Experience)
180
+ {safe_strip(work_experience)}
181
+
182
+ ## ಕೌಶಲ್ಯಗಳು (Skills)
183
+ {safe_strip(skills)}
184
+
185
+ ## ಪ್ರಮಾಣಪತ್ರಗಳು (Certifications)
186
+ {safe_strip(certifications)}
187
+ """
188
+
189
+
190
+ with gr.Blocks(title="Resume Translator with Kannada Translation") as resume_translator:
191
+ gr.Markdown("# Resume Upload")
192
+ gr.Markdown("Upload a Resume PDF to extract, translate to Kannada, and download.")
193
+
194
+ with gr.Row():
195
+ with gr.Column():
196
+ pdf_input = gr.File(label="Upload Resume", file_types=[".pdf"])
197
+ submit_btn = gr.Button("Process")
198
+
199
+ with gr.Column():
200
+ text_output = gr.File(label="Download Formatted Resume (.txt)")
201
+
202
+ submit_btn.click(
203
+ fn=process_pdf,
204
+ inputs=[pdf_input],
205
+ outputs=text_output
206
+ )
207
+
208
+
209
+ if __name__ == "__main__":
210
+ resume_translator.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ dwani
2
+ gradio
3
+ requests
4
+ logging