Commit
·
c572b2e
1
Parent(s):
1b52241
Update app.py
Browse files
app.py
CHANGED
@@ -93,11 +93,6 @@ def vtt_to_clean_file(file_in: str, file_out=None, **kwargs) -> str:
|
|
93 |
return file_out
|
94 |
|
95 |
|
96 |
-
def get_summary(filepath):
|
97 |
-
filepath = filepath
|
98 |
-
vtt_to_clean_file(filepath)
|
99 |
-
|
100 |
-
|
101 |
def count_tokens(filename):
|
102 |
with open(filename, 'r') as f:
|
103 |
text = f.read()
|
@@ -117,11 +112,14 @@ def break_up_file(tokens, chunk_size, overlap_size):
|
|
117 |
def break_up_file_to_chunks(filename, chunk_size=4000, overlap_size=100):
|
118 |
with open(filename, 'r') as f:
|
119 |
text = f.read()
|
|
|
120 |
tokens = word_tokenize(text)
|
121 |
return list(break_up_file(tokens, chunk_size, overlap_size))
|
122 |
|
123 |
|
124 |
def convert_to_prompt_text(tokenized_text):
|
|
|
|
|
125 |
prompt_text = " ".join(tokenized_text)
|
126 |
prompt_text = prompt_text.replace(" 's", "'s")
|
127 |
return prompt_text
|
@@ -138,43 +136,59 @@ def summarize_meeting(filepath):
|
|
138 |
# Break the text of the meeting transcripts into chunks.
|
139 |
chunks = break_up_file_to_chunks(filename)
|
140 |
# Summarize each chunk.
|
|
|
141 |
for i, chunk in enumerate(chunks):
|
142 |
-
|
|
|
|
|
|
|
|
|
143 |
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
|
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
response = openai.ChatCompletion.create(
|
149 |
-
model="gpt-
|
150 |
-
messages=
|
151 |
-
|
|
|
|
|
152 |
top_p=1,
|
153 |
frequency_penalty=0,
|
154 |
presence_penalty=0
|
155 |
)
|
156 |
|
157 |
-
|
158 |
-
response["choices"][0]["message"]['content'].strip())
|
159 |
|
160 |
-
#
|
161 |
-
|
162 |
|
163 |
-
# Summarize the text of the meeting transcripts.
|
164 |
-
messages = [{"role": "system", "content": "Consolidate, and summarize the text of the meeting transcripts. The output format should be markdown in the same language as the user's input. Start with a brief summary of the meeting, continue with bullets outlining the most important points of discussion. Finally, provide a table to show the list of action items with 3 columns: Action, Assigned Person, Due Date."}]
|
165 |
-
messages.append({"role": "user", "content": prompt_request})
|
166 |
response = openai.ChatCompletion.create(
|
167 |
model="gpt-4",
|
168 |
-
messages=
|
169 |
-
|
|
|
|
|
170 |
top_p=1,
|
171 |
frequency_penalty=0,
|
172 |
presence_penalty=0
|
173 |
)
|
174 |
|
175 |
-
|
176 |
|
177 |
-
|
|
|
178 |
|
179 |
|
180 |
def summarize_meeting_vtt(file):
|
|
|
93 |
return file_out
|
94 |
|
95 |
|
|
|
|
|
|
|
|
|
|
|
96 |
def count_tokens(filename):
|
97 |
with open(filename, 'r') as f:
|
98 |
text = f.read()
|
|
|
112 |
def break_up_file_to_chunks(filename, chunk_size=4000, overlap_size=100):
|
113 |
with open(filename, 'r') as f:
|
114 |
text = f.read()
|
115 |
+
|
116 |
tokens = word_tokenize(text)
|
117 |
return list(break_up_file(tokens, chunk_size, overlap_size))
|
118 |
|
119 |
|
120 |
def convert_to_prompt_text(tokenized_text):
|
121 |
+
#elimina de la lista los elementos de los strings que tengan al menos 3 números en cualquier lugar del string
|
122 |
+
tokenized_text = [x for x in tokenized_text if not any(c.isdigit() for c in x)]
|
123 |
prompt_text = " ".join(tokenized_text)
|
124 |
prompt_text = prompt_text.replace(" 's", "'s")
|
125 |
return prompt_text
|
|
|
136 |
# Break the text of the meeting transcripts into chunks.
|
137 |
chunks = break_up_file_to_chunks(filename)
|
138 |
# Summarize each chunk.
|
139 |
+
# Resumir cada fragmento.
|
140 |
for i, chunk in enumerate(chunks):
|
141 |
+
print(i)
|
142 |
+
print(chunk)
|
143 |
+
prompt_request = convert_to_prompt_text(chunk)
|
144 |
+
print(prompt_request)
|
145 |
+
prompt_request = "Resume brevemente esta transcripción de la reunión en el mismo idioma que la entrada del usuario: " + prompt_request
|
146 |
|
147 |
+
response = openai.ChatCompletion.create(
|
148 |
+
model="gpt-3.5-turbo",
|
149 |
+
messages=[
|
150 |
+
{"role": "user", "content": prompt_request}
|
151 |
+
],
|
152 |
+
temperature=.3
|
153 |
+
)
|
154 |
|
155 |
+
prompt_response.append(response["choices"][0]["message"]['content'].strip())
|
156 |
+
|
157 |
+
# Consolidar estos resúmenes de la reunión.
|
158 |
+
consolidated_summary = []
|
159 |
+
for summary in prompt_response:
|
160 |
+
prompt_request = "Resume el siguiente texto: " + summary
|
161 |
response = openai.ChatCompletion.create(
|
162 |
+
model="gpt-3.5-turbo",
|
163 |
+
messages=[
|
164 |
+
{"role": "user", "content": prompt_request}
|
165 |
+
],
|
166 |
+
temperature=.1,
|
167 |
top_p=1,
|
168 |
frequency_penalty=0,
|
169 |
presence_penalty=0
|
170 |
)
|
171 |
|
172 |
+
consolidated_summary.append(response["choices"][0]["message"]['content'].strip())
|
|
|
173 |
|
174 |
+
# Consolidar el resumen usando GPT-4
|
175 |
+
final_summary_request = " ".join(consolidated_summary)
|
176 |
|
|
|
|
|
|
|
177 |
response = openai.ChatCompletion.create(
|
178 |
model="gpt-4",
|
179 |
+
messages=[{"role": "system", "content": "Consolidar y resumir el texto de las transcripciones de la reunión. El formato de salida debe ser markdown en el mismo idioma que la entrada del usuario. Comenzar con un resumen breve de la reunión, continuar con puntos destacados que describan los aspectos más importantes de la discusión. Finalmente, proporcionar una tabla para mostrar la lista de acciones con 3 columnas: Acción, Persona Asignada, Fecha de Vencimiento."},
|
180 |
+
{"role": "user", "content": final_summary_request}
|
181 |
+
],
|
182 |
+
temperature=.1,
|
183 |
top_p=1,
|
184 |
frequency_penalty=0,
|
185 |
presence_penalty=0
|
186 |
)
|
187 |
|
188 |
+
final_summary = response["choices"][0]["message"]['content'].strip()
|
189 |
|
190 |
+
|
191 |
+
return final_summary
|
192 |
|
193 |
|
194 |
def summarize_meeting_vtt(file):
|