Update app.py
Browse files
app.py
CHANGED
|
@@ -21,7 +21,7 @@ import os
|
|
| 21 |
from dotenv import load_dotenv
|
| 22 |
import certifi
|
| 23 |
from bs4 import BeautifulSoup
|
| 24 |
-
|
| 25 |
|
| 26 |
# Load environment variables from a .env file
|
| 27 |
load_dotenv()
|
|
@@ -90,10 +90,6 @@ def scrape_with_bs4(url, session, max_chars=None):
|
|
| 90 |
logger.error(f"Error scraping {url} with BeautifulSoup: {e}")
|
| 91 |
return ""
|
| 92 |
|
| 93 |
-
from bs4 import BeautifulSoup
|
| 94 |
-
from trafilatura import extract
|
| 95 |
-
import requests
|
| 96 |
-
|
| 97 |
def scrape_with_trafilatura(url, max_chars=None, timeout=5, use_beautifulsoup=False):
|
| 98 |
try:
|
| 99 |
response = requests.get(url, timeout=timeout)
|
|
|
|
| 21 |
from dotenv import load_dotenv
|
| 22 |
import certifi
|
| 23 |
from bs4 import BeautifulSoup
|
| 24 |
+
import requests
|
| 25 |
|
| 26 |
# Load environment variables from a .env file
|
| 27 |
load_dotenv()
|
|
|
|
| 90 |
logger.error(f"Error scraping {url} with BeautifulSoup: {e}")
|
| 91 |
return ""
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
def scrape_with_trafilatura(url, max_chars=None, timeout=5, use_beautifulsoup=False):
|
| 94 |
try:
|
| 95 |
response = requests.get(url, timeout=timeout)
|