Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,7 +20,7 @@ def summarize_article(url, min_len, max_len):
|
|
| 20 |
soup = BeautifulSoup(r.text, 'html.parser')
|
| 21 |
|
| 22 |
# To finds all the <h1> (header) and <p> (paragraph) elements in the HTML content
|
| 23 |
-
results = soup.find_all(['h1','p'])
|
| 24 |
|
| 25 |
# Extract the text content from each element and store it in a list called text
|
| 26 |
text = [result.text for result in results]
|
|
@@ -29,7 +29,6 @@ def summarize_article(url, min_len, max_len):
|
|
| 29 |
ARTICLE = ' '.join(text)
|
| 30 |
|
| 31 |
# Replace sentence-ending punctuation with a special token (<eos>) . This helps split the article into smaller chunks for summarization.
|
| 32 |
-
ARTICLE = ARTICLE.replace('\n', '')
|
| 33 |
ARTICLE = ARTICLE.replace('.', '.<eos>')
|
| 34 |
ARTICLE = ARTICLE.replace('?', '?<eos>')
|
| 35 |
ARTICLE = ARTICLE.replace('!', '!<eos>')
|
|
|
|
| 20 |
soup = BeautifulSoup(r.text, 'html.parser')
|
| 21 |
|
| 22 |
# To finds all the <h1> (header) and <p> (paragraph) elements in the HTML content
|
| 23 |
+
results = soup.find_all(['h1','h2','p'])
|
| 24 |
|
| 25 |
# Extract the text content from each element and store it in a list called text
|
| 26 |
text = [result.text for result in results]
|
|
|
|
| 29 |
ARTICLE = ' '.join(text)
|
| 30 |
|
| 31 |
# Replace sentence-ending punctuation with a special token (<eos>) . This helps split the article into smaller chunks for summarization.
|
|
|
|
| 32 |
ARTICLE = ARTICLE.replace('.', '.<eos>')
|
| 33 |
ARTICLE = ARTICLE.replace('?', '?<eos>')
|
| 34 |
ARTICLE = ARTICLE.replace('!', '!<eos>')
|