Spaces:
Running
Running
File size: 572 Bytes
7a70225 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
# Import necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
class WebScraper:
def __init__(self, urls):
self.urls = urls
self.data = pd.DataFrame()
def scrape(self):
for url in self.urls:
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
text = ' '.join(map(lambda p: p.text, soup.find_all('p')))
self.data = self.data.append({'url': url, 'content': text}, ignore_index=True)
def get_data(self):
return self.data |