Spaces:

thunder-007
/

webscrapper

Runtime error

File size: 1,116 Bytes

96dd5a5

import gradio as gr
import requests
from bs4 import BeautifulSoup
import os
import tempfile
import re

title = 'Web Scraper'
description = '''
'''


def get_from_url(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        text = soup.get_text(separator="\n")

        with tempfile.NamedTemporaryFile(mode="w", delete=False, encoding="utf-8") as file:
            filename = file.name
            file.write(text)

        return filename  # Return the temporary file path
    except requests.exceptions.RequestException as e:
        print("Error fetching the URL:", e)
        return None
    except Exception as e:
        print("Error:", e)
        return None


def extract_text_from_html(urls):
    return [get_from_url(url) for url in urls.split("\n")]


interface = gr.Interface(fn=extract_text_from_html, inputs=[gr.Textbox(label="Url input")],
                         outputs=[gr.File(label="Scrapped Text")], title=title,
                         description=description)

interface.launch(debug=True)