nelsonjq commited on
Commit
4362da8
·
verified ·
1 Parent(s): 8306236

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+ import markdown # Import markdown library to convert markdown to HTML
4
+
5
+ #def html_to_markdown(html_text):
6
+ # Convert HTML to Markdown
7
+ # return html2text.html2text(html_text)
8
+
9
+ def process_markdown(text):
10
+ # Regex patterns for each case
11
+ patterns = [
12
+ re.compile(r'\[([^\]]+?) - ([^\(]+)\]\((http://[^\)]+?)\) \(([^)]+)\)'),
13
+ re.compile(r'\[([^\]]+?) \| ([^\(]+)\]\((http://[^\)]+?)\) \(([^)]+)\)'),
14
+ re.compile(r'\[([^\]]+?) \(([^\)]+)\)\]\((http://[^\)]+?)\) \(([^)]+)\)')
15
+ ]
16
+
17
+ def process_line(match):
18
+ title = match.group(1) # The title part inside the square brackets
19
+ url = match.group(3) # The URL inside the parentheses
20
+ second_mention_source = match.group(4) # The source name in parentheses
21
+
22
+ # Convert apostrophes to HTML entity
23
+ title = title.replace("'", "’")
24
+
25
+ # Return the formatted Markdown line
26
+ return f'[{title}]({url}) ({second_mention_source})'
27
+
28
+ # Process each pattern
29
+ for pattern in patterns:
30
+ text = pattern.sub(process_line, text)
31
+
32
+ return text
33
+
34
+ def convert_and_process(html_text):
35
+ # Convert HTML to Markdown
36
+ #markdown_text = html_to_markdown(html_text)
37
+ # Process the Markdown text
38
+ processed_markdown = process_markdown(html_text)
39
+ # Convert the processed Markdown back to HTML
40
+ html_output = markdown.markdown(processed_markdown)
41
+ return html_output
42
+
43
+ # Create the Gradio interface
44
+ iface = gr.Interface(
45
+ fn=convert_and_process,
46
+ inputs=gr.Textbox(label="Paste your markdown text here ⤵️"),
47
+ outputs=gr.HTML(label="Processed HTML Output"), # Output as HTML
48
+ title="Removing duplicated sources from Meltwater headlines",
49
+ description="1. Convert your text copied from Meltwater (with hyperlinks) into Markdown here: https://euangoddard.github.io/clipboard2markdown/\n\n 2. Paste your markdown text here to clean up the double source mentions, and submit to display the output as HTML."
50
+ )
51
+
52
+ # Launch the interface
53
+ iface.launch(share=True)