File size: 1,161 Bytes
188a2fe
 
 
d692aee
188a2fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import streamlit as st
from scraper import scrape_website, split_dom_content, clean_body_content, extract_body_content
from parse import parse
from Data import markdown_to_csv

st.title("AI Web Scraper")
url = st.text_input("Enter a Website URL")

if st.button("Scrape Site"):
    st.write("Scraping the website")

    result = scrape_website(url)
    body_content = extract_body_content(result)
    cleaned_content = clean_body_content(body_content)

    st.session_state.dom_content = cleaned_content

    with st.expander("View DOM Content"):
        st.text_area("DOM Content", cleaned_content, height=300)

if "dom_content" in st.session_state:
    parse_description = st.text_area("Describe what you want to parse?")
    
    if st.button("Parse Content"):
        if parse_description:
            st.write("Parsing Content")

            dom_chunks = split_dom_content(st.session_state.dom_content)
            result = parse(dom_chunks,parse_description)
            print(repr(result))

            
            
            # Appliquer la fonction
            tables = markdown_to_csv(result)
            for i in tables:
                st.write(i)