Spaces:
Sleeping
Sleeping
File size: 5,132 Bytes
80fb263 0eaed2f 80fb263 f7d9daf 80fb263 f7d9daf 80fb263 0eaed2f f7d9daf 0eaed2f f7d9daf 0eaed2f f7d9daf 0eaed2f f7d9daf 0eaed2f a682e5d 80fb263 a682e5d 80fb263 f7d9daf 48bc3a2 f7d9daf 48bc3a2 f7d9daf 48bc3a2 66c6476 f7d9daf 66c6476 f7d9daf 0eaed2f a682e5d 48bc3a2 a682e5d 48bc3a2 f7d9daf 3c69552 f7d9daf 3c69552 f7d9daf 3c69552 f7d9daf 3c69552 f7d9daf 66c6476 80fb263 f7d9daf 66c6476 f7d9daf 48bc3a2 f7d9daf 48bc3a2 f7d9daf 48bc3a2 66c6476 f7d9daf 48bc3a2 f7d9daf 0eaed2f a682e5d 0eaed2f f7d9daf 0eaed2f f7d9daf 0eaed2f f7d9daf a682e5d 80fb263 f7d9daf 48bc3a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import gradio as gr
import requests
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
def is_valid_url(url):
"""Checks if the string is a valid URL."""
try:
result = urlparse(url)
return all([result.scheme, result.netloc]) # Check for scheme and domain
except:
return False
def extract_additional_resources(url):
"""Extracts links to CSS, JS, and images from HTML code."""
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
# Extract CSS links
css_links = [urljoin(url, link["href"]) for link in soup.find_all("link", rel="stylesheet") if "href" in link.attrs]
# Extract JS links
js_links = [urljoin(url, script["src"]) for script in soup.find_all("script") if "src" in script.attrs]
# Extract image links
img_links = [urljoin(url, img["src"]) for img in soup.find_all("img") if "src" in img.attrs]
return css_links, js_links, img_links
except Exception as e:
return [], [], []
def fetch_file_content(url):
"""Fetches the content of a file (CSS, JS, etc.) from a URL."""
try:
response = requests.get(url)
response.raise_for_status()
return response.text
except:
return "Failed to fetch content."
def convert_to_text(url):
if not is_valid_url(url):
return "Error: Please enter a valid URL.", "", None, [], [], [], [], [] # Return error message and empty data
try:
# Set headers to mimic a browser request
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)
response.raise_for_status() # Check for HTTP errors (e.g., 404, 500)
# Return results
status = f"Request status: {response.status_code}"
content_length = f"Content size: {len(response.text)} characters"
results = f"{status}\n{content_length}"
# Save text content to a file
file_path = "downloaded_content.txt"
with open(file_path, "w", encoding="utf-8") as file:
file.write(response.text)
# Extract additional resources
css_links, js_links, img_links = extract_additional_resources(url)
# Fetch CSS and JS content
css_content = [fetch_file_content(link) for link in css_links]
js_content = [fetch_file_content(link) for link in js_links]
return results, response.text, file_path, css_links, js_links, img_links, css_content, js_content
except requests.exceptions.RequestException as e:
return f"Error: {e}", "", None, [], [], [], [], [] # Return error message and empty data
# HTML and JavaScript for the "Copy Code" button
copy_button_html = """
<script>
function copyCode() {
const text = document.querySelector("#output-text textarea").value;
navigator.clipboard.writeText(text).then(() => {
alert("Text copied to clipboard!");
}).catch(() => {
alert("Failed to copy text.");
});
}
</script>
<button onclick="copyCode()">Copy Code</button>
"""
# Link to the CSS file
css = "app.css"
# Create the Gradio interface
with gr.Blocks(css=css) as demo:
gr.Markdown("## URL to Text Converter")
gr.Markdown("Enter a URL to fetch its text content and download it as a .txt file.")
with gr.Row():
url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com")
with gr.Row():
results_output = gr.Textbox(label="Request Results", interactive=False)
text_output = gr.Textbox(label="Text Content", interactive=True, elem_id="output-text")
with gr.Row():
gr.HTML(copy_button_html) # Add the "Copy Code" button
file_output = gr.File(label="Download File", visible=False) # Hidden file download component
submit_button = gr.Button("Fetch Content")
submit_button.click(
fn=convert_to_text,
inputs=url_input,
outputs=[
results_output, text_output, file_output,
gr.Textbox(label="CSS Files"), gr.Textbox(label="JS Files"), gr.Textbox(label="Images"),
gr.Textbox(label="CSS Content"), gr.Textbox(label="JS Content")
]
)
# Add an Accordion to show/hide additional resources
with gr.Accordion("Show/Hide Additional Resources", open=False):
gr.Markdown("### CSS Files")
css_output = gr.Textbox(label="CSS Files", interactive=False)
gr.Markdown("### JS Files")
js_output = gr.Textbox(label="JS Files", interactive=False)
gr.Markdown("### Images")
img_output = gr.Textbox(label="Images", interactive=False)
gr.Markdown("### CSS Content")
css_content_output = gr.Textbox(label="CSS Content", interactive=True)
gr.Markdown("### JS Content")
js_content_output = gr.Textbox(label="JS Content", interactive=True)
# Launch the interface
demo.launch() |