Spaces:
Sleeping
Sleeping
| from pandas import DataFrame | |
| from src.application.config import WORD_BREAK | |
| from src.application.formatting import ( | |
| color_text, | |
| format_entity_count, | |
| ) | |
| from src.application.image.helper import encode_image | |
| from src.application.image.image import ImageDetector | |
| from src.application.text.entity import apply_highlight | |
| from src.application.text.helper import ( | |
| extract_equal_text, | |
| replace_leading_spaces, | |
| ) | |
| from src.application.text.text import TextDetector | |
| def create_fact_checker_table( | |
| aligned_sentences_df: DataFrame, | |
| text: TextDetector, | |
| image: ImageDetector, | |
| ): | |
| rows = [] | |
| if image.input is not None: | |
| rows.append(format_image_fact_checker_row(image)) | |
| if text.input is not None: | |
| for _, row in aligned_sentences_df.iterrows(): | |
| if row["input"] is None: | |
| continue | |
| if row["source"] is None: | |
| equal_idx_1 = equal_idx_2 = [] | |
| else: # Get index of equal phrases in input and source sentences | |
| equal_idx_1, equal_idx_2 = extract_equal_text( | |
| row["input"], | |
| row["source"], | |
| ) | |
| text.fact_checker_table.append( | |
| [ | |
| row, # aligned_sentences_df | |
| equal_idx_1, # index of equal text in input | |
| equal_idx_2, # index of equal text in source | |
| row["entities"], | |
| row["url"], | |
| ], | |
| ) | |
| previous_url = None | |
| span_row = 1 | |
| for index, row in enumerate(text.fact_checker_table): | |
| current_url = row[4] | |
| last_url_row = False | |
| # First row or URL change | |
| if index == 0 or current_url != previous_url: | |
| first_url_row = True | |
| previous_url = current_url | |
| # Increase counter "span_row" when the next url is the same | |
| while ( | |
| index + span_row < len(text.fact_checker_table) | |
| and text.fact_checker_table[index + span_row][4] | |
| == current_url | |
| ): | |
| span_row += 1 | |
| else: | |
| first_url_row = False | |
| span_row -= 1 | |
| if span_row == 1: | |
| last_url_row = True | |
| formatted_row = format_text_fact_checker_row( | |
| text, | |
| row, | |
| first_url_row, | |
| last_url_row, | |
| span_row, | |
| ) | |
| rows.append(formatted_row) | |
| table = "\n".join(rows) | |
| return f""" | |
| <h5>Comparison between input news and source news:</h5> | |
| <table border="1" style="width:100%; text-align:left;"> | |
| <col style="width: 170px;"> | |
| <col style="width: 170px;"> | |
| <col style="width: 30px;"> | |
| <col style="width: 75px;"> | |
| <thead> | |
| <tr> | |
| <th>Input news</th> | |
| <th>Source (URL in Originality)</th> | |
| <th>Forensic</th> | |
| <th>Originality</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| {table} | |
| </tbody> | |
| </table> | |
| <style> | |
| """ | |
| def format_text_fact_checker_row( | |
| text: TextDetector, | |
| row: list, | |
| first_url_row: bool = True, | |
| last_url_row: bool = True, | |
| span_row: int = 1, | |
| ): | |
| entity_count = 0 | |
| print(f"row: {row}") | |
| if row[0]["input"] is None: | |
| return "" | |
| if row[0]["source"] is not None: # source is not empty | |
| if row[3] is not None: | |
| # highlight entities | |
| input_sentence, highlight_idx_input = apply_highlight( | |
| row[0]["input"], | |
| row[3], | |
| "input", | |
| ) | |
| source_sentence, highlight_idx_source = apply_highlight( | |
| row[0]["source"], | |
| row[3], | |
| "source", | |
| ) | |
| else: | |
| input_sentence = row[0]["input"] | |
| source_sentence = row[0]["source"] | |
| highlight_idx_input = [] | |
| highlight_idx_source = [] | |
| if row[3] is not None: | |
| entity_count = len(row[3]) | |
| # Color overlapping words | |
| input_sentence = color_text( | |
| input_sentence, | |
| row[1], | |
| highlight_idx_input, | |
| ) # text, index of highlight words | |
| source_sentence = color_text( | |
| source_sentence, | |
| row[2], | |
| highlight_idx_source, | |
| ) # text, index of highlight words | |
| # Replace _ to get correct formatting | |
| # Original one having _ for correct word counting | |
| input_sentence = input_sentence.replace( | |
| "span_style", | |
| "span style", | |
| ).replace("1px_4px", "1px 4px") | |
| source_sentence = source_sentence.replace( | |
| "span_style", | |
| "span style", | |
| ).replace("1px_4px", "1px 4px") | |
| else: | |
| input_sentence = row[0]["input"] | |
| source_sentence = row[0]["source"] | |
| input_sentence = replace_leading_spaces(input_sentence) | |
| source_sentence = replace_leading_spaces(source_sentence) | |
| url = row[0]["url"] | |
| # Displayed label and score by url | |
| filterby_url = text.grouped_url_df[text.grouped_url_df["url"] == url] | |
| if len(filterby_url) > 0: | |
| label = filterby_url["label"].values[0] | |
| score = filterby_url["score"].values[0] | |
| else: | |
| label = text.prediction_label[0] | |
| score = text.prediction_score[0] | |
| # Format displayed url | |
| if url is None: | |
| source_text_url = url | |
| else: | |
| source_text_url = f"""<a href="{url}">{url}</a>""" | |
| # Format displayed entity count | |
| entity_count_text = format_entity_count(entity_count) | |
| border_top = "border-top: 1px solid transparent;" | |
| border_bottom = "border-bottom: 1px solid transparent;" | |
| if first_url_row is True: | |
| # First & Last the group: no transparent | |
| if last_url_row is True: | |
| return f""" | |
| <tr> | |
| <td>{input_sentence}</td> | |
| <td>{source_sentence}</td> | |
| <td rowspan="{span_row}">{label}<br> | |
| ({score * 100:.2f}%)<br><br> | |
| {entity_count_text}</td> | |
| <td rowspan="{span_row}"; style="{WORD_BREAK}";>{source_text_url}</td> | |
| </tr> | |
| """ | |
| # First row of the group: transparent bottom border | |
| return f""" | |
| <tr> | |
| <td style="{border_bottom}";>{input_sentence}</td> | |
| <td style="{border_bottom}";>{source_sentence}</td> | |
| <td rowspan="{span_row}">{label}<br> | |
| ({score * 100:.2f}%)<br><br> | |
| {entity_count_text}</td> | |
| <td rowspan="{span_row}"; style="{WORD_BREAK}";>{source_text_url}</td> | |
| </tr> | |
| """ | |
| else: | |
| if last_url_row is True: | |
| # NOT First row, Last row: transparent top border | |
| return f""" | |
| <tr> | |
| <td style="{border_top}";>{input_sentence}</td> | |
| <td style="{border_top}";>{source_sentence}</td> | |
| </tr> | |
| """ | |
| else: | |
| # NOT First & NOT Last row: transparent top & bottom borders | |
| return f""" | |
| <tr> | |
| <td style="{border_top} {border_bottom}";>{input_sentence}</td> | |
| <td style="{border_top} {border_bottom}";>{source_sentence}</td> | |
| </tr> | |
| """ | |
| def format_image_fact_checker_row(image: ImageDetector): | |
| if image.input is None: | |
| return "" | |
| if image.referent_url is not None or image.referent_url != "": | |
| if "http" in image.input: | |
| input_image = ( | |
| f"""<a href="{image.input}">{image.input}</a>""" # noqa: E501 | |
| ) | |
| else: | |
| base64_image = encode_image(image.input) | |
| input_image = f"""<img src="data:image/jpeg;base64,{base64_image}" width="100" height="150">""" # noqa: E501 | |
| source_image_url = f"""<a href="{image.referent_url}">{image.referent_url}</a>""" # noqa: E501 | |
| source_image = f"""<img src="{image.referent_url}" width="100" height="150">""" # noqa: E501 | |
| else: | |
| source_image = "Image not found" | |
| source_image_url = "" | |
| return f""" | |
| <tr> | |
| <td>{input_image}</td> | |
| <td>{source_image}</td> | |
| <td>{image.prediction_label}<br>({image.prediction_score:.2f}%)</td> | |
| <td style="{WORD_BREAK}";>{source_image_url}</td> | |
| </tr> | |
| """ | |