Spaces:
Runtime error
Runtime error
🚸 🎨
Browse filesSigned-off-by: peter szemraj <[email protected]>
- app.py +1 -0
- summarize.py +2 -0
- utils.py +3 -3
app.py
CHANGED
|
@@ -334,6 +334,7 @@ if __name__ == "__main__":
|
|
| 334 |
uploaded_file = gr.File(
|
| 335 |
label="File Upload",
|
| 336 |
file_count="single",
|
|
|
|
| 337 |
type="file",
|
| 338 |
)
|
| 339 |
with gr.Row():
|
|
|
|
| 334 |
uploaded_file = gr.File(
|
| 335 |
label="File Upload",
|
| 336 |
file_count="single",
|
| 337 |
+
file_types=[".txt", ".md", ".pdf"],
|
| 338 |
type="file",
|
| 339 |
)
|
| 340 |
with gr.Row():
|
summarize.py
CHANGED
|
@@ -114,7 +114,9 @@ def summarize_via_tokenbatches(
|
|
| 114 |
tokenizer (): the tokenizer to use for summarization
|
| 115 |
batch_length (int, optional): the length of each batch. Defaults to 2048.
|
| 116 |
batch_stride (int, optional): the stride of each batch. Defaults to 16. The stride is the number of tokens that overlap between batches.
|
|
|
|
| 117 |
|
|
|
|
| 118 |
Returns:
|
| 119 |
list: a list of dictionaries containing the input tokens, the summary, and the summary score
|
| 120 |
"""
|
|
|
|
| 114 |
tokenizer (): the tokenizer to use for summarization
|
| 115 |
batch_length (int, optional): the length of each batch. Defaults to 2048.
|
| 116 |
batch_stride (int, optional): the stride of each batch. Defaults to 16. The stride is the number of tokens that overlap between batches.
|
| 117 |
+
min_batch_length (int, optional): the minimum length of each batch. Defaults to 512.
|
| 118 |
|
| 119 |
+
**kwargs: any additional arguments to pass to the model for inference
|
| 120 |
Returns:
|
| 121 |
list: a list of dictionaries containing the input tokens, the summary, and the summary score
|
| 122 |
"""
|
utils.py
CHANGED
|
@@ -156,7 +156,7 @@ def extract_keywords(
|
|
| 156 |
for keyword in keywords:
|
| 157 |
if not any(fuzz.ratio(keyword, other) > 70 for other in final_keywords):
|
| 158 |
final_keywords.append(keyword)
|
| 159 |
-
logger.
|
| 160 |
return final_keywords
|
| 161 |
|
| 162 |
|
|
@@ -178,9 +178,9 @@ def saves_summary(
|
|
| 178 |
full_summary = "\n".join(sum_text)
|
| 179 |
|
| 180 |
keywords = "_".join(extract_keywords(full_summary))
|
| 181 |
-
logger.
|
| 182 |
outpath = (
|
| 183 |
-
Path.cwd() / f"document_summary_{get_timestamp()}
|
| 184 |
if outpath is None
|
| 185 |
else Path(outpath)
|
| 186 |
)
|
|
|
|
| 156 |
for keyword in keywords:
|
| 157 |
if not any(fuzz.ratio(keyword, other) > 70 for other in final_keywords):
|
| 158 |
final_keywords.append(keyword)
|
| 159 |
+
logger.debug(f"Keywords (final):\t{final_keywords}")
|
| 160 |
return final_keywords
|
| 161 |
|
| 162 |
|
|
|
|
| 178 |
full_summary = "\n".join(sum_text)
|
| 179 |
|
| 180 |
keywords = "_".join(extract_keywords(full_summary))
|
| 181 |
+
logger.debug(f"kw:\t{keywords}")
|
| 182 |
outpath = (
|
| 183 |
+
Path.cwd() / f"document_summary_{keywords}_{get_timestamp()}.txt"
|
| 184 |
if outpath is None
|
| 185 |
else Path(outpath)
|
| 186 |
)
|