webscrapper / app.py
thunder-007's picture
initial
96dd5a5
import gradio as gr
import requests
from bs4 import BeautifulSoup
import os
import tempfile
import re
title = 'Web Scraper'
description = '''
'''
def get_from_url(url):
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
text = soup.get_text(separator="\n")
with tempfile.NamedTemporaryFile(mode="w", delete=False, encoding="utf-8") as file:
filename = file.name
file.write(text)
return filename # Return the temporary file path
except requests.exceptions.RequestException as e:
print("Error fetching the URL:", e)
return None
except Exception as e:
print("Error:", e)
return None
def extract_text_from_html(urls):
return [get_from_url(url) for url in urls.split("\n")]
interface = gr.Interface(fn=extract_text_from_html, inputs=[gr.Textbox(label="Url input")],
outputs=[gr.File(label="Scrapped Text")], title=title,
description=description)
interface.launch(debug=True)