|
import gradio as gr |
|
import re |
|
import markdown |
|
|
|
|
|
|
|
|
|
|
|
def process_markdown(text): |
|
|
|
patterns = [ |
|
re.compile(r'\[([^\]]+?) - ([^\(]+)\]\((http://[^\)]+?)\) \(([^)]+)\)'), |
|
re.compile(r'\[([^\]]+?) \| ([^\(]+)\]\((http://[^\)]+?)\) \(([^)]+)\)'), |
|
re.compile(r'\[([^\]]+?) \(([^\)]+)\)\]\((http://[^\)]+?)\) \(([^)]+)\)') |
|
] |
|
|
|
def process_line(match): |
|
title = match.group(1) |
|
url = match.group(3) |
|
second_mention_source = match.group(4) |
|
|
|
|
|
title = title.replace("'", "’") |
|
|
|
|
|
return f'[{title}]({url}) ({second_mention_source})' |
|
|
|
|
|
for pattern in patterns: |
|
text = pattern.sub(process_line, text) |
|
|
|
return text |
|
|
|
def convert_and_process(html_text): |
|
|
|
|
|
|
|
processed_markdown = process_markdown(html_text) |
|
|
|
html_output = markdown.markdown(processed_markdown) |
|
return html_output |
|
|
|
|
|
iface = gr.Interface( |
|
fn=convert_and_process, |
|
inputs=gr.Textbox(label="Paste your markdown text here ⤵️"), |
|
outputs=gr.HTML(label="Processed HTML Output"), |
|
title="Removing duplicated sources from Meltwater headlines", |
|
description="1. Convert your text copied from Meltwater (with hyperlinks) into Markdown here: https://euangoddard.github.io/clipboard2markdown/\n\n 2. Paste your markdown text here to clean up the double source mentions, and submit to display the output as HTML." |
|
) |
|
|
|
|
|
iface.launch(share=True) |