andreinigo commited on
Commit
c572b2e
·
1 Parent(s): 1b52241

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -23
app.py CHANGED
@@ -93,11 +93,6 @@ def vtt_to_clean_file(file_in: str, file_out=None, **kwargs) -> str:
93
  return file_out
94
 
95
 
96
- def get_summary(filepath):
97
- filepath = filepath
98
- vtt_to_clean_file(filepath)
99
-
100
-
101
  def count_tokens(filename):
102
  with open(filename, 'r') as f:
103
  text = f.read()
@@ -117,11 +112,14 @@ def break_up_file(tokens, chunk_size, overlap_size):
117
  def break_up_file_to_chunks(filename, chunk_size=4000, overlap_size=100):
118
  with open(filename, 'r') as f:
119
  text = f.read()
 
120
  tokens = word_tokenize(text)
121
  return list(break_up_file(tokens, chunk_size, overlap_size))
122
 
123
 
124
  def convert_to_prompt_text(tokenized_text):
 
 
125
  prompt_text = " ".join(tokenized_text)
126
  prompt_text = prompt_text.replace(" 's", "'s")
127
  return prompt_text
@@ -138,43 +136,59 @@ def summarize_meeting(filepath):
138
  # Break the text of the meeting transcripts into chunks.
139
  chunks = break_up_file_to_chunks(filename)
140
  # Summarize each chunk.
 
141
  for i, chunk in enumerate(chunks):
142
- prompt_request = convert_to_prompt_text(chunks[i])
 
 
 
 
143
 
144
- messages = [
145
- {"role": "system", "content": "Summarize this meeting transcript in the same language as the user's input."}]
146
- messages.append({"role": "user", "content": prompt_request})
 
 
 
 
147
 
 
 
 
 
 
 
148
  response = openai.ChatCompletion.create(
149
- model="gpt-4",
150
- messages=messages,
151
- temperature=.4,
 
 
152
  top_p=1,
153
  frequency_penalty=0,
154
  presence_penalty=0
155
  )
156
 
157
- prompt_response.append(
158
- response["choices"][0]["message"]['content'].strip())
159
 
160
- # Consolidate these meeting summaries.
161
- prompt_request = str(prompt_response)
162
 
163
- # Summarize the text of the meeting transcripts.
164
- messages = [{"role": "system", "content": "Consolidate, and summarize the text of the meeting transcripts. The output format should be markdown in the same language as the user's input. Start with a brief summary of the meeting, continue with bullets outlining the most important points of discussion. Finally, provide a table to show the list of action items with 3 columns: Action, Assigned Person, Due Date."}]
165
- messages.append({"role": "user", "content": prompt_request})
166
  response = openai.ChatCompletion.create(
167
  model="gpt-4",
168
- messages=messages,
169
- temperature=.3,
 
 
170
  top_p=1,
171
  frequency_penalty=0,
172
  presence_penalty=0
173
  )
174
 
175
- summary_text = response["choices"][0]["message"]['content'].strip()
176
 
177
- return summary_text
 
178
 
179
 
180
  def summarize_meeting_vtt(file):
 
93
  return file_out
94
 
95
 
 
 
 
 
 
96
  def count_tokens(filename):
97
  with open(filename, 'r') as f:
98
  text = f.read()
 
112
  def break_up_file_to_chunks(filename, chunk_size=4000, overlap_size=100):
113
  with open(filename, 'r') as f:
114
  text = f.read()
115
+
116
  tokens = word_tokenize(text)
117
  return list(break_up_file(tokens, chunk_size, overlap_size))
118
 
119
 
120
  def convert_to_prompt_text(tokenized_text):
121
+ #elimina de la lista los elementos de los strings que tengan al menos 3 números en cualquier lugar del string
122
+ tokenized_text = [x for x in tokenized_text if not any(c.isdigit() for c in x)]
123
  prompt_text = " ".join(tokenized_text)
124
  prompt_text = prompt_text.replace(" 's", "'s")
125
  return prompt_text
 
136
  # Break the text of the meeting transcripts into chunks.
137
  chunks = break_up_file_to_chunks(filename)
138
  # Summarize each chunk.
139
+ # Resumir cada fragmento.
140
  for i, chunk in enumerate(chunks):
141
+ print(i)
142
+ print(chunk)
143
+ prompt_request = convert_to_prompt_text(chunk)
144
+ print(prompt_request)
145
+ prompt_request = "Resume brevemente esta transcripción de la reunión en el mismo idioma que la entrada del usuario: " + prompt_request
146
 
147
+ response = openai.ChatCompletion.create(
148
+ model="gpt-3.5-turbo",
149
+ messages=[
150
+ {"role": "user", "content": prompt_request}
151
+ ],
152
+ temperature=.3
153
+ )
154
 
155
+ prompt_response.append(response["choices"][0]["message"]['content'].strip())
156
+
157
+ # Consolidar estos resúmenes de la reunión.
158
+ consolidated_summary = []
159
+ for summary in prompt_response:
160
+ prompt_request = "Resume el siguiente texto: " + summary
161
  response = openai.ChatCompletion.create(
162
+ model="gpt-3.5-turbo",
163
+ messages=[
164
+ {"role": "user", "content": prompt_request}
165
+ ],
166
+ temperature=.1,
167
  top_p=1,
168
  frequency_penalty=0,
169
  presence_penalty=0
170
  )
171
 
172
+ consolidated_summary.append(response["choices"][0]["message"]['content'].strip())
 
173
 
174
+ # Consolidar el resumen usando GPT-4
175
+ final_summary_request = " ".join(consolidated_summary)
176
 
 
 
 
177
  response = openai.ChatCompletion.create(
178
  model="gpt-4",
179
+ messages=[{"role": "system", "content": "Consolidar y resumir el texto de las transcripciones de la reunión. El formato de salida debe ser markdown en el mismo idioma que la entrada del usuario. Comenzar con un resumen breve de la reunión, continuar con puntos destacados que describan los aspectos más importantes de la discusión. Finalmente, proporcionar una tabla para mostrar la lista de acciones con 3 columnas: Acción, Persona Asignada, Fecha de Vencimiento."},
180
+ {"role": "user", "content": final_summary_request}
181
+ ],
182
+ temperature=.1,
183
  top_p=1,
184
  frequency_penalty=0,
185
  presence_penalty=0
186
  )
187
 
188
+ final_summary = response["choices"][0]["message"]['content'].strip()
189
 
190
+
191
+ return final_summary
192
 
193
 
194
  def summarize_meeting_vtt(file):