Spaces:

awacke1
/

ComputerUseSeleniumPlaywrightDifflibSKLearnImagehash

Sleeping

App Files Files Community

awacke1 commited on Nov 13, 2024

Commit

f19e94e

verified ·

1 Parent(s): 579471f

Update app.py

Browse files

Files changed (1) hide show

app.py +213 -0

app.py CHANGED Viewed

@@ -1,5 +1,218 @@
 import streamlit as st
 AppGoals="""
 Computer Use
 1. Browser based testing app

 import streamlit as st
+import requests
+from bs4 import BeautifulSoup
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.by import By
+from PIL import Image
+import imagehash
+from difflib import SequenceMatcher
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import time
+import io
+import base64
+from urllib.parse import urljoin, urlparse
+import pandas as pd
+import plotly.graph_objects as go
+import numpy as np
+def initialize_session_state():
+    if 'visited_urls' not in st.session_state:
+        st.session_state.visited_urls = []
+    if 'load_times' not in st.session_state:
+        st.session_state.load_times = []
+    if 'screenshots' not in st.session_state:
+        st.session_state.screenshots = []
+    if 'crawl_results' not in st.session_state:
+        st.session_state.crawl_results = []
+def setup_chrome_driver():
+    chrome_options = Options()
+    chrome_options.add_argument("--headless")
+    chrome_options.add_argument("--no-sandbox")
+    chrome_options.add_argument("--disable-dev-shm-usage")
+    return webdriver.Chrome(options=chrome_options)
+def capture_screenshot(driver):
+    screenshot = driver.get_screenshot_as_png()
+    return Image.open(io.BytesIO(screenshot))
+def calculate_similarity(text1, text2):
+    # Basic similarity
+    basic_ratio = SequenceMatcher(None, text1, text2).ratio()
+    # Semantic similarity
+    vectorizer = TfidfVectorizer()
+    try:
+        tfidf = vectorizer.fit_transform([text1, text2])
+        semantic_ratio = cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]
+    except:
+        semantic_ratio = 0
+    return basic_ratio, semantic_ratio
+def crawl_website(url, max_pages=10, search_term=None):
+    visited = set()
+    to_visit = {url}
+    results = []
+    while to_visit and len(visited) < max_pages:
+        current_url = to_visit.pop()
+        if current_url in visited:
+            continue
+        try:
+            response = requests.get(current_url)
+            soup = BeautifulSoup(response.text, 'html.parser')
+            visited.add(current_url)
+            # Extract text content
+            text_content = soup.get_text()
+            # If search term provided, check for matches
+            match_found = search_term.lower() in text_content.lower() if search_term else True
+            if match_found:
+                results.append({
+                    'url': current_url,
+                    'title': soup.title.string if soup.title else 'No title',
+                    'content_preview': text_content[:200],
+                    'matches_search': match_found
+                })
+            # Find new links
+            for link in soup.find_all('a'):
+                href = link.get('href')
+                if href:
+                    absolute_url = urljoin(current_url, href)
+                    if urlparse(absolute_url).netloc == urlparse(url).netloc:
+                        to_visit.add(absolute_url)
+        except Exception as e:
+            st.error(f"Error crawling {current_url}: {str(e)}")
+    return results
+def main():
+    st.title("Web Testing and Crawling Suite")
+    initialize_session_state()
+    # Sidebar for tool selection
+    tool = st.sidebar.radio(
+        "Select Tool",
+        ["WebTest", "Crawler", "AI Content Comparison"]
+    )
+    if tool == "WebTest":
+        st.header("WebTest - Web Performance Testing")
+        url = st.text_input("Enter URL to test")
+        interval = st.slider("Time interval between requests (seconds)", 1, 30, 5)
+        max_cycles = st.number_input("Number of test cycles", 1, 100, 1)
+        if st.button("Start Testing"):
+            driver = setup_chrome_driver()
+            for cycle in range(max_cycles):
+                start_time = time.time()
+                try:
+                    driver.get(url)
+                    load_time = time.time() - start_time
+                    st.session_state.load_times.append(load_time)
+                    # Capture screenshot
+                    screenshot = capture_screenshot(driver)
+                    st.session_state.screenshots.append(screenshot)
+                    # Show results
+                    st.success(f"Cycle {cycle + 1} completed - Load time: {load_time:.2f}s")
+                    st.image(screenshot, caption=f"Screenshot - Cycle {cycle + 1}")
+                    # Plot load times
+                    fig = go.Figure(data=go.Scatter(
+                        x=list(range(1, len(st.session_state.load_times) + 1)),
+                        y=st.session_state.load_times,
+                        mode='lines+markers'
+                    ))
+                    fig.update_layout(title="Page Load Times",
+                                    xaxis_title="Cycle",
+                                    yaxis_title="Load Time (s)")
+                    st.plotly_chart(fig)
+                    time.sleep(interval)
+                except Exception as e:
+                    st.error(f"Error in cycle {cycle + 1}: {str(e)}")
+            driver.quit()
+    elif tool == "Crawler":
+        st.header("Web Crawler")
+        base_url = st.text_input("Enter base URL to crawl")
+        max_pages = st.number_input("Maximum pages to crawl", 1, 100, 10)
+        search_term = st.text_input("Search term (optional)")
+        if st.button("Start Crawling"):
+            results = crawl_website(base_url, max_pages, search_term)
+            st.session_state.crawl_results = results
+            # Display results
+            df = pd.DataFrame(results)
+            st.dataframe(df)
+            # Export options
+            if st.button("Export Results"):
+                csv = df.to_csv(index=False)
+                b64 = base64.b64encode(csv.encode()).decode()
+                href = f'<a href="data:file/csv;base64,{b64}" download="crawl_results.csv">Download CSV</a>'
+                st.markdown(href, unsafe_allow_html=True)
+    else:  # AI Content Comparison
+        st.header("AI Content Comparison")
+        url1 = st.text_input("Enter first URL (AI-generated content)")
+        url2 = st.text_input("Enter second URL (Comparison content)")
+        if st.button("Compare Content"):
+            driver = setup_chrome_driver()
+            try:
+                # Get content from first URL
+                driver.get(url1)
+                content1 = driver.find_element(By.TAG_NAME, "body").text
+                # Get content from second URL
+                driver.get(url2)
+                content2 = driver.find_element(By.TAG_NAME, "body").text
+                # Calculate similarities
+                basic_ratio, semantic_ratio = calculate_similarity(content1, content2)
+                # Display results
+                st.subheader("Similarity Results")
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.metric("Basic Similarity", f"{basic_ratio:.2%}")
+                with col2:
+                    st.metric("Semantic Similarity", f"{semantic_ratio:.2%}")
+                # Show content previews
+                st.subheader("Content Previews")
+                st.text_area("Content 1 (First 500 chars)", content1[:500])
+                st.text_area("Content 2 (First 500 chars)", content2[:500])
+            except Exception as e:
+                st.error(f"Error comparing content: {str(e)}")
+            finally:
+                driver.quit()
+if __name__ == "__main__":
+    main()
 AppGoals="""
 Computer Use
 1. Browser based testing app