saeedzou commited on
Commit
f24ece6
·
verified ·
1 Parent(s): 019296d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -2
app.py CHANGED
@@ -115,10 +115,67 @@ def summarize(transcript_text, word_count, model_sel, lang_sel):
115
 
116
  return response.text
117
 
118
- def punctuate(transcript, model_sel):
119
  client = genai.Client(api_key=GEMINI_API_KEY)
120
- prompt = f"Restore puncutations of the transcript from an ASR model given. Maintain the original content. Only reply with the output. text: \n{transcript}"
 
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  response = client.models.generate_content(
123
  model=model_sel,
124
  contents=[prompt]
 
115
 
116
  return response.text
117
 
118
+ def punctuate(transcript_text, model_sel):
119
  client = genai.Client(api_key=GEMINI_API_KEY)
120
+ prompt = f"""
121
+ Task: Punctuate the given Persian transcript text from an ASR model accurately according to Persian punctuation rules. Do not change any characters, correct mistakes, or modify the text in any way. You are ONLY allowed to add appropriate punctuations.
122
 
123
+ Guidelines:
124
+ 1. Period (نقطه .)
125
+ Ends a sentence.
126
+
127
+ Used after abbreviations (e.g., آی.ب.ام.).
128
+
129
+ 2. Comma (ویرگول ,)
130
+ Separates words/phrases in a list.
131
+
132
+ Used in compound or conditional sentences.
133
+
134
+ Prevents ambiguity (e.g., شلنگ مخصوص، آتش‌نشانی).
135
+
136
+ Surrounds parenthetical phrases (e.g., استیو جابز، بنیان‌گذار اپل،…).
137
+
138
+ Distinguishes adverbs (e.g., بعد از چندین ماه، ورزش کردم).
139
+
140
+ Separates repeated words (e.g., آن کشور، کشور خوبی است).
141
+
142
+ 3. Semicolon (نقطه‌ویرگول ؛)
143
+ Connects related sentences when a full stop is too strong.
144
+
145
+ Separates clauses in complex lists (e.g., آلمان، ایتالیا و ژاپن؛ انگلیس، شوروی و آمریکا).
146
+
147
+ Used before explanatory phrases (e.g., فتوسنتز مهم است؛ یعنی…).
148
+
149
+ 4. Colon (دونقطه :)
150
+ Introduces explanations, lists, or direct quotes (e.g., او گفت: «من آماده‌ام.»).
151
+
152
+ 5. Ellipsis (سه‌نقطه …)
153
+ Indicates omitted words (e.g., فرهنگی، سیاسی، اجتماعی و …).
154
+
155
+ 6. Parentheses (پرانتز ())
156
+ Encloses extra information, dates, or clarifications (e.g., جنگ جهانی دوم (۱۹۴۵)).
157
+
158
+ 7. Quotation Marks (گیومه «»)
159
+ Encloses direct speech (e.g., او گفت: «سلام!»).
160
+
161
+ Highlights specific words (e.g., او را «نابغه» خواندند).
162
+
163
+ 8. Question Mark (علامت سؤال ؟)
164
+ Ends direct questions (e.g., آیا آمدی؟).
165
+
166
+ 9. Exclamation Mark (علامت تعجب !)
167
+ Expresses surprise, emphasis, or commands (e.g., چه پرنده زیبایی!).
168
+
169
+
170
+ Instructions:
171
+ Apply these punctuation rules without modifying the original text.
172
+
173
+ Do not correct typos, spelling, or grammar mistakes.
174
+
175
+ Only return the punctuated text as output—no explanations or additional comments.
176
+ Input:
177
+ {transcript_text}
178
+ """
179
  response = client.models.generate_content(
180
  model=model_sel,
181
  contents=[prompt]