Change some text and set truncation for summarization model
Browse files
app.py
CHANGED
|
@@ -51,7 +51,7 @@ def get_summarizer_model():
|
|
| 51 |
|
| 52 |
# Page setup
|
| 53 |
st.set_page_config(
|
| 54 |
-
page_title="Post-processing summarization fact checker",
|
| 55 |
page_icon="",
|
| 56 |
layout="centered",
|
| 57 |
initial_sidebar_state="auto",
|
|
@@ -68,7 +68,7 @@ def list_all_article_names() -> list:
|
|
| 68 |
for file in sorted(os.listdir('./sample-articles/')):
|
| 69 |
if file.endswith('.txt'):
|
| 70 |
filenames.append(file.replace('.txt', ''))
|
| 71 |
-
|
| 72 |
filenames.append("Provide your own input")
|
| 73 |
return filenames
|
| 74 |
|
|
@@ -101,7 +101,6 @@ def fetch_dependency_specific_contents(filename: str) -> AnyStr:
|
|
| 101 |
|
| 102 |
def fetch_dependency_svg(filename: str) -> AnyStr:
|
| 103 |
with open(f'./dependency-images/{filename.lower()}.txt', 'r') as f:
|
| 104 |
-
# data = f.read()
|
| 105 |
lines = [line.rstrip() for line in f]
|
| 106 |
return lines
|
| 107 |
|
|
@@ -113,9 +112,6 @@ def display_summary(summary_content: str):
|
|
| 113 |
|
| 114 |
|
| 115 |
def get_all_entities_per_sentence(text):
|
| 116 |
-
# load all NER models
|
| 117 |
-
# nlp = get_spacy()
|
| 118 |
-
# tagger = get_flair_tagger()
|
| 119 |
doc = nlp(text)
|
| 120 |
|
| 121 |
sentences = list(doc.sents)
|
|
@@ -128,7 +124,7 @@ def get_all_entities_per_sentence(text):
|
|
| 128 |
for entity in sentence.ents:
|
| 129 |
entities_this_sentence.append(str(entity))
|
| 130 |
|
| 131 |
-
# FLAIR ENTITIES
|
| 132 |
# sentence_entities = Sentence(str(sentence))
|
| 133 |
# tagger.predict(sentence_entities)
|
| 134 |
# for entity in sentence_entities.get_spans('ner'):
|
|
@@ -150,22 +146,17 @@ def get_all_entities(text):
|
|
| 150 |
|
| 151 |
|
| 152 |
def get_and_compare_entities():
|
| 153 |
-
# article_content = fetch_article_contents(article_name)
|
| 154 |
article_content = st.session_state.article_text
|
| 155 |
all_entities_per_sentence = get_all_entities_per_sentence(article_content)
|
| 156 |
-
# st.session_state.entities_per_sentence_article = all_entities_per_sentence
|
| 157 |
entities_article = list(itertools.chain.from_iterable(all_entities_per_sentence))
|
| 158 |
|
| 159 |
-
# summary_content = fetch_summary_contents(article_name)
|
| 160 |
summary_content = st.session_state.summary_output
|
| 161 |
all_entities_per_sentence = get_all_entities_per_sentence(summary_content)
|
| 162 |
-
# st.session_state.entities_per_sentence_summary = all_entities_per_sentence
|
| 163 |
entities_summary = list(itertools.chain.from_iterable(all_entities_per_sentence))
|
| 164 |
|
| 165 |
matched_entities = []
|
| 166 |
unmatched_entities = []
|
| 167 |
for entity in entities_summary:
|
| 168 |
-
# TODO: currently substring matching but probably should do embedding method or idk?
|
| 169 |
if any(entity.lower() in substring_entity.lower() for substring_entity in entities_article):
|
| 170 |
matched_entities.append(entity)
|
| 171 |
elif any(
|
|
@@ -179,7 +170,6 @@ def get_and_compare_entities():
|
|
| 179 |
|
| 180 |
|
| 181 |
def highlight_entities():
|
| 182 |
-
# summary_content = fetch_summary_contents(article_name)
|
| 183 |
summary_content = st.session_state.summary_output
|
| 184 |
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
|
| 185 |
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
|
@@ -206,11 +196,9 @@ def check_dependency(article: bool):
|
|
| 206 |
if article:
|
| 207 |
text = st.session_state.article_text
|
| 208 |
all_entities = get_all_entities_per_sentence(text)
|
| 209 |
-
# all_entities = st.session_state.entities_per_sentence_article
|
| 210 |
else:
|
| 211 |
text = st.session_state.summary_output
|
| 212 |
all_entities = get_all_entities_per_sentence(text)
|
| 213 |
-
# all_entities = st.session_state.entities_per_sentence_summary
|
| 214 |
doc = nlp(text)
|
| 215 |
tok_l = doc.to_json()['tokens']
|
| 216 |
test_list_dict_output = []
|
|
@@ -230,7 +218,6 @@ def check_dependency(article: bool):
|
|
| 230 |
continue
|
| 231 |
# ONE NEEDS TO BE ENTITY
|
| 232 |
if object_here in all_entities[i]:
|
| 233 |
-
# all_deps = all_deps.join(str(sentence))
|
| 234 |
identifier = object_here + t['dep'] + object_target
|
| 235 |
test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start),
|
| 236 |
"target_word_index": (t['head'] - sentence.start),
|
|
@@ -261,23 +248,24 @@ def generate_abstractive_summary(text, type, min_len=120, max_len=512, **kwargs)
|
|
| 261 |
if type == "top_p":
|
| 262 |
text = summarization_model(text, min_length=min_len,
|
| 263 |
max_length=max_len,
|
| 264 |
-
top_k=50, top_p=0.95, clean_up_tokenization_spaces=True)
|
| 265 |
elif type == "greedy":
|
| 266 |
text = summarization_model(text, min_length=min_len,
|
| 267 |
-
max_length=max_len, clean_up_tokenization_spaces=True)
|
| 268 |
elif type == "top_k":
|
| 269 |
text = summarization_model(text, min_length=min_len, max_length=max_len, top_k=50,
|
| 270 |
-
clean_up_tokenization_spaces=True)
|
| 271 |
elif type == "beam":
|
| 272 |
text = summarization_model(text, min_length=min_len,
|
| 273 |
max_length=max_len,
|
| 274 |
-
clean_up_tokenization_spaces=True, **kwargs)
|
| 275 |
summary = text[0]['summary_text'].replace("<n>", " ")
|
| 276 |
return summary
|
| 277 |
|
| 278 |
|
|
|
|
| 279 |
# Page
|
| 280 |
-
st.title('Summarization fact checker')
|
| 281 |
|
| 282 |
# INTRODUCTION
|
| 283 |
st.header("Introduction")
|
|
@@ -286,16 +274,14 @@ several different downstream NLP tasks. One such task is that of text summarizat
|
|
| 286 |
is to generate concise and accurate summaries from input document(s). There are 2 types of summarization: extractive
|
| 287 |
and abstractive. **Extractive summarization** merely copies informative fragments from the input,
|
| 288 |
whereas **abstractive summarization** may generate novel words. A good abstractive summary should cover principal
|
| 289 |
-
information in the input and has to be linguistically fluent. This blogpost will focus on this more difficult task of
|
| 290 |
abstractive summary generation.""")
|
| 291 |
|
| 292 |
st.markdown("""To generate summaries we will use the [PEGASUS] (https://huggingface.co/google/pegasus-cnn_dailymail)
|
| 293 |
model, producing abstractive summaries from large articles. These summaries often contain sentences with different
|
| 294 |
-
kinds of errors. Rather than improving the core model, we will look into possible post-processing steps to
|
| 295 |
-
the generated summaries.
|
| 296 |
-
|
| 297 |
-
results for some methods on specific examples. These text blocks will be indicated and they change according to the
|
| 298 |
-
currently selected article.""")
|
| 299 |
|
| 300 |
# Load all different models (cached) at start time of the hugginface space
|
| 301 |
sentence_embedding_model = get_sentence_embedding_model()
|
|
@@ -304,10 +290,11 @@ nlp = get_spacy()
|
|
| 304 |
summarization_model = get_summarizer_model()
|
| 305 |
|
| 306 |
# GENERATING SUMMARIES PART
|
| 307 |
-
st.header("Generating summaries")
|
| 308 |
st.markdown("Let’s start by selecting an article text for which we want to generate a summary, or you can provide "
|
| 309 |
-
"text yourself. Note that it’s suggested to provide a sufficiently large
|
| 310 |
-
"generated from it might not be optimal, leading to suboptimal performance of the post-processing
|
|
|
|
| 311 |
|
| 312 |
selected_article = st.selectbox('Select an article or provide your own:',
|
| 313 |
list_all_article_names())
|
|
@@ -319,19 +306,18 @@ article_text = st.text_area(
|
|
| 319 |
)
|
| 320 |
|
| 321 |
summarize_button = st.button(label='Process article content',
|
| 322 |
-
help="
|
| 323 |
|
| 324 |
if summarize_button:
|
| 325 |
st.session_state.article_text = article_text
|
| 326 |
st.markdown(
|
| 327 |
-
"Below you can find the generated summary for the article.
|
| 328 |
-
"
|
| 329 |
"factual a summary is for a given article. The idea is that in production, you could generate a set of "
|
| 330 |
"summaries for the same article, with different parameters (or even different models). By using "
|
| 331 |
"post-processing error detection, we can then select the best possible summary.")
|
| 332 |
if st.session_state.article_text:
|
| 333 |
-
with st.spinner('Generating summary...'):
|
| 334 |
-
# classify_comment(article_text, selected_model)
|
| 335 |
if selected_article != "Provide your own input" and article_text == fetch_article_contents(
|
| 336 |
selected_article):
|
| 337 |
st.session_state.unchanged_text = True
|
|
@@ -367,17 +353,15 @@ if summarize_button:
|
|
| 367 |
|
| 368 |
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
|
| 369 |
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
| 370 |
-
st.markdown(
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
"explanation of the results below.",
|
| 380 |
-
unsafe_allow_html=True)
|
| 381 |
if st.session_state.unchanged_text:
|
| 382 |
entity_specific_text = fetch_entity_specific_contents(selected_article)
|
| 383 |
soup = BeautifulSoup(entity_specific_text, features="html.parser")
|
|
@@ -397,21 +381,17 @@ if summarize_button:
|
|
| 397 |
st.markdown("Here, “Jan” is the “poss” (possession modifier) of “wife”. If suddenly the summary would read “Jan’s "
|
| 398 |
"husband…”, there would be a dependency in the summary that is non-existent in the article itself (namely "
|
| 399 |
"“Jan” is the “poss” of “husband”). However, often new dependencies are introduced in the summary that "
|
| 400 |
-
"are still correct. “The borders of Ukraine” have a different dependency between “borders” and
|
|
|
|
| 401 |
"than “Ukraine’s borders”, while both descriptions have the same meaning. So just matching all "
|
| 402 |
"dependencies between article and summary (as we did with entity matching) would not be a robust method.")
|
| 403 |
-
st.markdown(
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
"dependencies between an existing **entity** and its direct connections. Below we highlight all unmatched "
|
| 410 |
-
"dependencies that satisfy the discussed constraints. We also discuss the specific results for the "
|
| 411 |
-
"currently selected article.")
|
| 412 |
with st.spinner("Doing dependency parsing..."):
|
| 413 |
-
# TODO RIGHT IF FUNCTION (IF EXAMPLE AND IF INPUT UNCHANGED)
|
| 414 |
-
# if selected_article == 'article11':
|
| 415 |
if st.session_state.unchanged_text:
|
| 416 |
for cur_svg_image in fetch_dependency_svg(selected_article):
|
| 417 |
st.write(cur_svg_image, unsafe_allow_html=True)
|
|
@@ -431,15 +411,13 @@ if summarize_button:
|
|
| 431 |
|
| 432 |
# OUTRO/CONCLUSION
|
| 433 |
st.header("Wrapping up")
|
| 434 |
-
st.markdown(
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
"definitely not sufficiently robust for general use-cases. (something about that we tested also RE and "
|
| 440 |
-
"maybe other things).")
|
| 441 |
st.markdown("####")
|
| 442 |
-
st.markdown("Below we generated 5 different kind of summaries from the article in which their ranks are estimated, "
|
| 443 |
"and hopefully the best summary (read: the one that a human would prefer or indicate as the best one) "
|
| 444 |
"will be at the top. TODO: implement this (at the end I think) and also put something in the text with "
|
| 445 |
"the actual parameters or something? ")
|
|
|
|
| 51 |
|
| 52 |
# Page setup
|
| 53 |
st.set_page_config(
|
| 54 |
+
page_title="📜 Post-processing summarization fact checker 📜",
|
| 55 |
page_icon="",
|
| 56 |
layout="centered",
|
| 57 |
initial_sidebar_state="auto",
|
|
|
|
| 68 |
for file in sorted(os.listdir('./sample-articles/')):
|
| 69 |
if file.endswith('.txt'):
|
| 70 |
filenames.append(file.replace('.txt', ''))
|
| 71 |
+
# Append free use possibility:
|
| 72 |
filenames.append("Provide your own input")
|
| 73 |
return filenames
|
| 74 |
|
|
|
|
| 101 |
|
| 102 |
def fetch_dependency_svg(filename: str) -> AnyStr:
|
| 103 |
with open(f'./dependency-images/{filename.lower()}.txt', 'r') as f:
|
|
|
|
| 104 |
lines = [line.rstrip() for line in f]
|
| 105 |
return lines
|
| 106 |
|
|
|
|
| 112 |
|
| 113 |
|
| 114 |
def get_all_entities_per_sentence(text):
|
|
|
|
|
|
|
|
|
|
| 115 |
doc = nlp(text)
|
| 116 |
|
| 117 |
sentences = list(doc.sents)
|
|
|
|
| 124 |
for entity in sentence.ents:
|
| 125 |
entities_this_sentence.append(str(entity))
|
| 126 |
|
| 127 |
+
# FLAIR ENTITIES (CURRENTLY NOT USED)
|
| 128 |
# sentence_entities = Sentence(str(sentence))
|
| 129 |
# tagger.predict(sentence_entities)
|
| 130 |
# for entity in sentence_entities.get_spans('ner'):
|
|
|
|
| 146 |
|
| 147 |
|
| 148 |
def get_and_compare_entities():
|
|
|
|
| 149 |
article_content = st.session_state.article_text
|
| 150 |
all_entities_per_sentence = get_all_entities_per_sentence(article_content)
|
|
|
|
| 151 |
entities_article = list(itertools.chain.from_iterable(all_entities_per_sentence))
|
| 152 |
|
|
|
|
| 153 |
summary_content = st.session_state.summary_output
|
| 154 |
all_entities_per_sentence = get_all_entities_per_sentence(summary_content)
|
|
|
|
| 155 |
entities_summary = list(itertools.chain.from_iterable(all_entities_per_sentence))
|
| 156 |
|
| 157 |
matched_entities = []
|
| 158 |
unmatched_entities = []
|
| 159 |
for entity in entities_summary:
|
|
|
|
| 160 |
if any(entity.lower() in substring_entity.lower() for substring_entity in entities_article):
|
| 161 |
matched_entities.append(entity)
|
| 162 |
elif any(
|
|
|
|
| 170 |
|
| 171 |
|
| 172 |
def highlight_entities():
|
|
|
|
| 173 |
summary_content = st.session_state.summary_output
|
| 174 |
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
|
| 175 |
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
|
|
|
| 196 |
if article:
|
| 197 |
text = st.session_state.article_text
|
| 198 |
all_entities = get_all_entities_per_sentence(text)
|
|
|
|
| 199 |
else:
|
| 200 |
text = st.session_state.summary_output
|
| 201 |
all_entities = get_all_entities_per_sentence(text)
|
|
|
|
| 202 |
doc = nlp(text)
|
| 203 |
tok_l = doc.to_json()['tokens']
|
| 204 |
test_list_dict_output = []
|
|
|
|
| 218 |
continue
|
| 219 |
# ONE NEEDS TO BE ENTITY
|
| 220 |
if object_here in all_entities[i]:
|
|
|
|
| 221 |
identifier = object_here + t['dep'] + object_target
|
| 222 |
test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start),
|
| 223 |
"target_word_index": (t['head'] - sentence.start),
|
|
|
|
| 248 |
if type == "top_p":
|
| 249 |
text = summarization_model(text, min_length=min_len,
|
| 250 |
max_length=max_len,
|
| 251 |
+
top_k=50, top_p=0.95, clean_up_tokenization_spaces=True, truncation=True, **kwargs)
|
| 252 |
elif type == "greedy":
|
| 253 |
text = summarization_model(text, min_length=min_len,
|
| 254 |
+
max_length=max_len, clean_up_tokenization_spaces=True, truncation=True, **kwargs)
|
| 255 |
elif type == "top_k":
|
| 256 |
text = summarization_model(text, min_length=min_len, max_length=max_len, top_k=50,
|
| 257 |
+
clean_up_tokenization_spaces=True, truncation=True, **kwargs)
|
| 258 |
elif type == "beam":
|
| 259 |
text = summarization_model(text, min_length=min_len,
|
| 260 |
max_length=max_len,
|
| 261 |
+
clean_up_tokenization_spaces=True, truncation=True, **kwargs)
|
| 262 |
summary = text[0]['summary_text'].replace("<n>", " ")
|
| 263 |
return summary
|
| 264 |
|
| 265 |
|
| 266 |
+
|
| 267 |
# Page
|
| 268 |
+
st.title('📜 Summarization fact checker 📜')
|
| 269 |
|
| 270 |
# INTRODUCTION
|
| 271 |
st.header("Introduction")
|
|
|
|
| 274 |
is to generate concise and accurate summaries from input document(s). There are 2 types of summarization: extractive
|
| 275 |
and abstractive. **Extractive summarization** merely copies informative fragments from the input,
|
| 276 |
whereas **abstractive summarization** may generate novel words. A good abstractive summary should cover principal
|
| 277 |
+
information in the input and has to be linguistically fluent. This interactive blogpost will focus on this more difficult task of
|
| 278 |
abstractive summary generation.""")
|
| 279 |
|
| 280 |
st.markdown("""To generate summaries we will use the [PEGASUS] (https://huggingface.co/google/pegasus-cnn_dailymail)
|
| 281 |
model, producing abstractive summaries from large articles. These summaries often contain sentences with different
|
| 282 |
+
kinds of errors. Rather than improving the core model, we will look into possible post-processing steps to detect errors
|
| 283 |
+
from the generated summaries. Throughout this blog, we will also explain the results for some methods on specific
|
| 284 |
+
examples. These text blocks will be indicated and they change according to the currently selected article.""")
|
|
|
|
|
|
|
| 285 |
|
| 286 |
# Load all different models (cached) at start time of the hugginface space
|
| 287 |
sentence_embedding_model = get_sentence_embedding_model()
|
|
|
|
| 290 |
summarization_model = get_summarizer_model()
|
| 291 |
|
| 292 |
# GENERATING SUMMARIES PART
|
| 293 |
+
st.header("🪶 Generating summaries")
|
| 294 |
st.markdown("Let’s start by selecting an article text for which we want to generate a summary, or you can provide "
|
| 295 |
+
"text yourself. Note that it’s suggested to provide a sufficiently large article, as otherwise the "
|
| 296 |
+
"summary generated from it might not be optimal, leading to suboptimal performance of the post-processing "
|
| 297 |
+
"steps. However, too long articles will be truncated and might miss information in the summary.")
|
| 298 |
|
| 299 |
selected_article = st.selectbox('Select an article or provide your own:',
|
| 300 |
list_all_article_names())
|
|
|
|
| 306 |
)
|
| 307 |
|
| 308 |
summarize_button = st.button(label='Process article content',
|
| 309 |
+
help="Start interactive blogpost")
|
| 310 |
|
| 311 |
if summarize_button:
|
| 312 |
st.session_state.article_text = article_text
|
| 313 |
st.markdown(
|
| 314 |
+
"Below you can find the generated summary for the article. We will discuss two approaches that we found are "
|
| 315 |
+
"able to detect some common errors. Based on errors, one could then score different summaries, indicating how "
|
| 316 |
"factual a summary is for a given article. The idea is that in production, you could generate a set of "
|
| 317 |
"summaries for the same article, with different parameters (or even different models). By using "
|
| 318 |
"post-processing error detection, we can then select the best possible summary.")
|
| 319 |
if st.session_state.article_text:
|
| 320 |
+
with st.spinner('Generating summary, this might take a while...'):
|
|
|
|
| 321 |
if selected_article != "Provide your own input" and article_text == fetch_article_contents(
|
| 322 |
selected_article):
|
| 323 |
st.session_state.unchanged_text = True
|
|
|
|
| 353 |
|
| 354 |
markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
|
| 355 |
markdown_start_green = "<mark class=\"entity\" style=\"background: rgb(121, 236, 121);\">"
|
| 356 |
+
st.markdown(
|
| 357 |
+
"We call this technique “entity matching” and here you can see what this looks like when we apply this "
|
| 358 |
+
"method on the summary. Entities in the summary are marked " + green_text + " when the entity also "
|
| 359 |
+
"exists in the article, "
|
| 360 |
+
"while unmatched entities "
|
| 361 |
+
"are marked " + red_text +
|
| 362 |
+
". Several of the example articles and their summaries indicate different errors we find by using this "
|
| 363 |
+
"technique. Based on the current article, we provide a short explanation of the results below **(only for "
|
| 364 |
+
"example articles)**. ", unsafe_allow_html=True)
|
|
|
|
|
|
|
| 365 |
if st.session_state.unchanged_text:
|
| 366 |
entity_specific_text = fetch_entity_specific_contents(selected_article)
|
| 367 |
soup = BeautifulSoup(entity_specific_text, features="html.parser")
|
|
|
|
| 381 |
st.markdown("Here, “Jan” is the “poss” (possession modifier) of “wife”. If suddenly the summary would read “Jan’s "
|
| 382 |
"husband…”, there would be a dependency in the summary that is non-existent in the article itself (namely "
|
| 383 |
"“Jan” is the “poss” of “husband”). However, often new dependencies are introduced in the summary that "
|
| 384 |
+
"are still correct. “The borders of Ukraine” have a different dependency between “borders” and "
|
| 385 |
+
"“Ukraine” "
|
| 386 |
"than “Ukraine’s borders”, while both descriptions have the same meaning. So just matching all "
|
| 387 |
"dependencies between article and summary (as we did with entity matching) would not be a robust method.")
|
| 388 |
+
st.markdown("However, we have found that there are specific dependencies that, when unmatched, are often an "
|
| 389 |
+
"indication of a wrongly constructed sentence. We found 2(/3 TODO) common dependencies which, "
|
| 390 |
+
"when present in the summary but not in the article, are highly indicative of factualness errors. "
|
| 391 |
+
"Furthermore, we only check dependencies between an existing **entity** and its direct connections. "
|
| 392 |
+
"Below we highlight all unmatched dependencies that satisfy the discussed constraints. We also "
|
| 393 |
+
"discuss the specific results for the currently selected example article.")
|
|
|
|
|
|
|
|
|
|
| 394 |
with st.spinner("Doing dependency parsing..."):
|
|
|
|
|
|
|
| 395 |
if st.session_state.unchanged_text:
|
| 396 |
for cur_svg_image in fetch_dependency_svg(selected_article):
|
| 397 |
st.write(cur_svg_image, unsafe_allow_html=True)
|
|
|
|
| 411 |
|
| 412 |
# OUTRO/CONCLUSION
|
| 413 |
st.header("Wrapping up")
|
| 414 |
+
st.markdown("We have presented 2 methods that try to detect errors in summaries via post-processing steps. Entity "
|
| 415 |
+
"matching can be used to solve hallucinations, while dependency comparison can be used to filter out "
|
| 416 |
+
"some bad sentences (and thus worse summaries). These methods highlight the possibilities of "
|
| 417 |
+
"post-processing AI-made summaries, but are only a first introduction. As the methods were "
|
| 418 |
+
"empirically tested they are definitely not sufficiently robust for general use-cases.")
|
|
|
|
|
|
|
| 419 |
st.markdown("####")
|
| 420 |
+
st.markdown("(TODO) Below we generated 5 different kind of summaries from the article in which their ranks are estimated, "
|
| 421 |
"and hopefully the best summary (read: the one that a human would prefer or indicate as the best one) "
|
| 422 |
"will be at the top. TODO: implement this (at the end I think) and also put something in the text with "
|
| 423 |
"the actual parameters or something? ")
|