Spaces:
Runtime error
Runtime error
Commit
·
ef4c284
1
Parent(s):
e37a87f
Update app.py
Browse files
app.py
CHANGED
|
@@ -192,11 +192,11 @@ def predict(
|
|
| 192 |
def proc_submission(
|
| 193 |
input_text: str,
|
| 194 |
model_name: str,
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
num_beams: int = 3,
|
| 201 |
max_input_length: int = 8182,
|
| 202 |
):
|
|
@@ -503,8 +503,6 @@ if __name__ == "__main__":
|
|
| 503 |
gr.Markdown("## Load Inputs & Select Parameters")
|
| 504 |
gr.Markdown(
|
| 505 |
"""Enter/paste text below, or upload a file. Pick a model & adjust params (_optional_), and press **Summarize!**
|
| 506 |
-
|
| 507 |
-
# See [the guide doc](https://gist.github.com/pszemraj/722a7ba443aa3a671b02d87038375519) for details.
|
| 508 |
"""
|
| 509 |
)
|
| 510 |
with gr.Row(variant="compact"):
|
|
@@ -559,7 +557,7 @@ if __name__ == "__main__":
|
|
| 559 |
# gr.Markdown(
|
| 560 |
# "_Summarization should take ~1-2 minutes for most settings, but may extend up to 5-10 minutes in some scenarios._"
|
| 561 |
# )
|
| 562 |
-
output_text = gr.HTML("<p><em>
|
| 563 |
with gr.Column():
|
| 564 |
gr.Markdown("### Results & Scores")
|
| 565 |
with gr.Row():
|
|
@@ -587,81 +585,81 @@ if __name__ == "__main__":
|
|
| 587 |
label="Summary",
|
| 588 |
value="<center><i>Summary will appear here!</i></center>",
|
| 589 |
)
|
| 590 |
-
with gr.Column():
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
|
| 615 |
gr.Markdown("---")
|
| 616 |
-
with gr.Column():
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
with gr.Column():
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
|
| 666 |
# load_examples_button.click(
|
| 667 |
# fn=load_single_example_text, inputs=[example_name], outputs=[input_text]
|
|
|
|
| 192 |
def proc_submission(
|
| 193 |
input_text: str,
|
| 194 |
model_name: str,
|
| 195 |
+
predrop_stopwords: bool = False,
|
| 196 |
+
repetition_penalty: float = 0.5,
|
| 197 |
+
no_repeat_ngram_size: int = 3,
|
| 198 |
+
length_penalty: float = 1.5,
|
| 199 |
+
token_batch_length: int = 1530,
|
| 200 |
num_beams: int = 3,
|
| 201 |
max_input_length: int = 8182,
|
| 202 |
):
|
|
|
|
| 503 |
gr.Markdown("## Load Inputs & Select Parameters")
|
| 504 |
gr.Markdown(
|
| 505 |
"""Enter/paste text below, or upload a file. Pick a model & adjust params (_optional_), and press **Summarize!**
|
|
|
|
|
|
|
| 506 |
"""
|
| 507 |
)
|
| 508 |
with gr.Row(variant="compact"):
|
|
|
|
| 557 |
# gr.Markdown(
|
| 558 |
# "_Summarization should take ~1-2 minutes for most settings, but may extend up to 5-10 minutes in some scenarios._"
|
| 559 |
# )
|
| 560 |
+
output_text = gr.HTML("<p><em>Summarization should take ~1-2 minutes for most settings, but may extend up to 5-10 minutes in some scenarios.</em></p>")
|
| 561 |
with gr.Column():
|
| 562 |
gr.Markdown("### Results & Scores")
|
| 563 |
with gr.Row():
|
|
|
|
| 585 |
label="Summary",
|
| 586 |
value="<center><i>Summary will appear here!</i></center>",
|
| 587 |
)
|
| 588 |
+
# with gr.Column():
|
| 589 |
+
# gr.Markdown("### **Aggregate Summary Batches**")
|
| 590 |
+
# gr.Markdown(
|
| 591 |
+
# "_Note: this is an experimental feature. Feedback welcome in the [discussions](https://hf.co/spaces/pszemraj/document-summarization/discussions)!_"
|
| 592 |
+
# )
|
| 593 |
+
# with gr.Row():
|
| 594 |
+
# aggregate_button = gr.Button(
|
| 595 |
+
# "Aggregate!",
|
| 596 |
+
# variant="primary",
|
| 597 |
+
# )
|
| 598 |
+
# gr.Markdown(
|
| 599 |
+
# f"""Aggregate the above batches into a cohesive summary.
|
| 600 |
+
# - A secondary instruct-tuned LM consolidates info
|
| 601 |
+
# - Current model: [{AGGREGATE_MODEL}](https://hf.co/{AGGREGATE_MODEL})
|
| 602 |
+
# """
|
| 603 |
+
# )
|
| 604 |
+
# with gr.Column(variant="panel"):
|
| 605 |
+
# aggregated_summary = gr.HTML(
|
| 606 |
+
# label="Aggregate Summary",
|
| 607 |
+
# value="<center><i>Aggregate summary will appear here!</i></center>",
|
| 608 |
+
# )
|
| 609 |
+
# gr.Markdown(
|
| 610 |
+
# "\n\n_Aggregate summary is also appended to the bottom of the `.txt` file._"
|
| 611 |
+
# )
|
| 612 |
|
| 613 |
gr.Markdown("---")
|
| 614 |
+
# with gr.Column():
|
| 615 |
+
# gr.Markdown("### Advanced Settings")
|
| 616 |
+
# gr.Markdown(
|
| 617 |
+
# "Refer to [the guide doc](https://gist.github.com/pszemraj/722a7ba443aa3a671b02d87038375519) for what these are, and how they impact _quality_ and _speed_."
|
| 618 |
+
# )
|
| 619 |
+
# with gr.Row(variant="compact"):
|
| 620 |
+
# length_penalty = gr.Slider(
|
| 621 |
+
# minimum=0.3,
|
| 622 |
+
# maximum=1.1,
|
| 623 |
+
# label="length penalty",
|
| 624 |
+
# value=0.7,
|
| 625 |
+
# step=0.05,
|
| 626 |
+
# )
|
| 627 |
+
# token_batch_length = gr.Radio(
|
| 628 |
+
# choices=TOKEN_BATCH_OPTIONS,
|
| 629 |
+
# label="token batch length",
|
| 630 |
+
# # select median option
|
| 631 |
+
# value=TOKEN_BATCH_OPTIONS[len(TOKEN_BATCH_OPTIONS) // 2],
|
| 632 |
+
# )
|
| 633 |
+
|
| 634 |
+
# with gr.Row(variant="compact"):
|
| 635 |
+
# repetition_penalty = gr.Slider(
|
| 636 |
+
# minimum=1.0,
|
| 637 |
+
# maximum=5.0,
|
| 638 |
+
# label="repetition penalty",
|
| 639 |
+
# value=1.5,
|
| 640 |
+
# step=0.1,
|
| 641 |
+
# )
|
| 642 |
+
# no_repeat_ngram_size = gr.Radio(
|
| 643 |
+
# choices=[2, 3, 4, 5],
|
| 644 |
+
# label="no repeat ngram size",
|
| 645 |
+
# value=3,
|
| 646 |
+
# )
|
| 647 |
+
# predrop_stopwords = gr.Checkbox(
|
| 648 |
+
# label="Drop Stopwords (Pre-Truncation)",
|
| 649 |
+
# value=False,
|
| 650 |
+
# )
|
| 651 |
+
# with gr.Column():
|
| 652 |
+
# gr.Markdown("## About")
|
| 653 |
+
# gr.Markdown(
|
| 654 |
+
# "- Models are fine-tuned on the [🅱️ookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that generalizes well and is useful for summarizing text in academic and everyday use."
|
| 655 |
+
# )
|
| 656 |
+
# gr.Markdown(
|
| 657 |
+
# "- _Update April 2023:_ Additional models fine-tuned on the [PLOS](https://hf.co/datasets/pszemraj/scientific_lay_summarisation-plos-norm) and [ELIFE](https://hf.co/datasets/pszemraj/scientific_lay_summarisation-elife-norm) subsets of the [scientific lay summaries](https://arxiv.org/abs/2210.09932) dataset are available (see dropdown at the top)."
|
| 658 |
+
# )
|
| 659 |
+
# gr.Markdown(
|
| 660 |
+
# "Adjust the max input words & max PDF pages for OCR by duplicating this space and [setting the environment variables](https://hf.co/docs/hub/spaces-overview#managing-secrets) `APP_MAX_WORDS` and `APP_OCR_MAX_PAGES` to the desired integer values."
|
| 661 |
+
# )
|
| 662 |
+
# gr.Markdown("---")
|
| 663 |
|
| 664 |
# load_examples_button.click(
|
| 665 |
# fn=load_single_example_text, inputs=[example_name], outputs=[input_text]
|