Spaces:
Sleeping
Sleeping
import gradio as gr | |
import subprocess | |
import os | |
def run_katana(url): | |
katana_config_dir = "/tmp/katana" | |
os.makedirs(katana_config_dir, exist_ok=True) | |
config_file_path = os.path.join(katana_config_dir, "config.yaml") | |
with open(config_file_path, 'w') as f: | |
f.write("headless: true\n") # Add a valid configuration option | |
output_file = "/tmp/urls.txt" | |
command = f"katana -config {config_file_path} -u {url} -o {output_file}" | |
try: | |
subprocess.run(command, shell=True, check=True, stderr=subprocess.PIPE, text=True) | |
if os.path.exists(output_file): | |
return output_file | |
else: | |
return "Katana completed but no output file was created." | |
except subprocess.CalledProcessError as e: | |
return f"Error running katana: {e.stderr}" | |
# Ensure the /tmp/flagged directory exists | |
os.makedirs("/tmp/flagged", exist_ok=True) | |
# Gradio interface | |
interface = gr.Interface( | |
fn=run_katana, | |
inputs="text", | |
outputs="file", | |
title="Katana URL Crawler", | |
description="Enter a URL to crawl with Katana.", | |
flagging_dir="/tmp/flagged" # Specify a writable directory for flagging | |
) | |
interface.launch(server_name="0.0.0.0", server_port=7860) | |