Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,218 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
|
|
3 |
AppGoals="""
|
4 |
Computer Use
|
5 |
1. Browser based testing app
|
|
|
1 |
import streamlit as st
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
from selenium import webdriver
|
5 |
+
from selenium.webdriver.chrome.options import Options
|
6 |
+
from selenium.webdriver.common.by import By
|
7 |
+
from PIL import Image
|
8 |
+
import imagehash
|
9 |
+
from difflib import SequenceMatcher
|
10 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
11 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
12 |
+
import time
|
13 |
+
import io
|
14 |
+
import base64
|
15 |
+
from urllib.parse import urljoin, urlparse
|
16 |
+
import pandas as pd
|
17 |
+
import plotly.graph_objects as go
|
18 |
+
import numpy as np
|
19 |
+
|
20 |
+
def initialize_session_state():
|
21 |
+
if 'visited_urls' not in st.session_state:
|
22 |
+
st.session_state.visited_urls = []
|
23 |
+
if 'load_times' not in st.session_state:
|
24 |
+
st.session_state.load_times = []
|
25 |
+
if 'screenshots' not in st.session_state:
|
26 |
+
st.session_state.screenshots = []
|
27 |
+
if 'crawl_results' not in st.session_state:
|
28 |
+
st.session_state.crawl_results = []
|
29 |
+
|
30 |
+
def setup_chrome_driver():
|
31 |
+
chrome_options = Options()
|
32 |
+
chrome_options.add_argument("--headless")
|
33 |
+
chrome_options.add_argument("--no-sandbox")
|
34 |
+
chrome_options.add_argument("--disable-dev-shm-usage")
|
35 |
+
return webdriver.Chrome(options=chrome_options)
|
36 |
+
|
37 |
+
def capture_screenshot(driver):
|
38 |
+
screenshot = driver.get_screenshot_as_png()
|
39 |
+
return Image.open(io.BytesIO(screenshot))
|
40 |
+
|
41 |
+
def calculate_similarity(text1, text2):
|
42 |
+
# Basic similarity
|
43 |
+
basic_ratio = SequenceMatcher(None, text1, text2).ratio()
|
44 |
+
|
45 |
+
# Semantic similarity
|
46 |
+
vectorizer = TfidfVectorizer()
|
47 |
+
try:
|
48 |
+
tfidf = vectorizer.fit_transform([text1, text2])
|
49 |
+
semantic_ratio = cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]
|
50 |
+
except:
|
51 |
+
semantic_ratio = 0
|
52 |
+
|
53 |
+
return basic_ratio, semantic_ratio
|
54 |
+
|
55 |
+
def crawl_website(url, max_pages=10, search_term=None):
|
56 |
+
visited = set()
|
57 |
+
to_visit = {url}
|
58 |
+
results = []
|
59 |
+
|
60 |
+
while to_visit and len(visited) < max_pages:
|
61 |
+
current_url = to_visit.pop()
|
62 |
+
if current_url in visited:
|
63 |
+
continue
|
64 |
+
|
65 |
+
try:
|
66 |
+
response = requests.get(current_url)
|
67 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
68 |
+
visited.add(current_url)
|
69 |
+
|
70 |
+
# Extract text content
|
71 |
+
text_content = soup.get_text()
|
72 |
+
|
73 |
+
# If search term provided, check for matches
|
74 |
+
match_found = search_term.lower() in text_content.lower() if search_term else True
|
75 |
+
|
76 |
+
if match_found:
|
77 |
+
results.append({
|
78 |
+
'url': current_url,
|
79 |
+
'title': soup.title.string if soup.title else 'No title',
|
80 |
+
'content_preview': text_content[:200],
|
81 |
+
'matches_search': match_found
|
82 |
+
})
|
83 |
+
|
84 |
+
# Find new links
|
85 |
+
for link in soup.find_all('a'):
|
86 |
+
href = link.get('href')
|
87 |
+
if href:
|
88 |
+
absolute_url = urljoin(current_url, href)
|
89 |
+
if urlparse(absolute_url).netloc == urlparse(url).netloc:
|
90 |
+
to_visit.add(absolute_url)
|
91 |
+
|
92 |
+
except Exception as e:
|
93 |
+
st.error(f"Error crawling {current_url}: {str(e)}")
|
94 |
+
|
95 |
+
return results
|
96 |
+
|
97 |
+
def main():
|
98 |
+
st.title("Web Testing and Crawling Suite")
|
99 |
+
initialize_session_state()
|
100 |
+
|
101 |
+
# Sidebar for tool selection
|
102 |
+
tool = st.sidebar.radio(
|
103 |
+
"Select Tool",
|
104 |
+
["WebTest", "Crawler", "AI Content Comparison"]
|
105 |
+
)
|
106 |
+
|
107 |
+
if tool == "WebTest":
|
108 |
+
st.header("WebTest - Web Performance Testing")
|
109 |
+
url = st.text_input("Enter URL to test")
|
110 |
+
interval = st.slider("Time interval between requests (seconds)", 1, 30, 5)
|
111 |
+
max_cycles = st.number_input("Number of test cycles", 1, 100, 1)
|
112 |
+
|
113 |
+
if st.button("Start Testing"):
|
114 |
+
driver = setup_chrome_driver()
|
115 |
+
|
116 |
+
for cycle in range(max_cycles):
|
117 |
+
start_time = time.time()
|
118 |
+
|
119 |
+
try:
|
120 |
+
driver.get(url)
|
121 |
+
load_time = time.time() - start_time
|
122 |
+
st.session_state.load_times.append(load_time)
|
123 |
+
|
124 |
+
# Capture screenshot
|
125 |
+
screenshot = capture_screenshot(driver)
|
126 |
+
st.session_state.screenshots.append(screenshot)
|
127 |
+
|
128 |
+
# Show results
|
129 |
+
st.success(f"Cycle {cycle + 1} completed - Load time: {load_time:.2f}s")
|
130 |
+
st.image(screenshot, caption=f"Screenshot - Cycle {cycle + 1}")
|
131 |
+
|
132 |
+
# Plot load times
|
133 |
+
fig = go.Figure(data=go.Scatter(
|
134 |
+
x=list(range(1, len(st.session_state.load_times) + 1)),
|
135 |
+
y=st.session_state.load_times,
|
136 |
+
mode='lines+markers'
|
137 |
+
))
|
138 |
+
fig.update_layout(title="Page Load Times",
|
139 |
+
xaxis_title="Cycle",
|
140 |
+
yaxis_title="Load Time (s)")
|
141 |
+
st.plotly_chart(fig)
|
142 |
+
|
143 |
+
time.sleep(interval)
|
144 |
+
|
145 |
+
except Exception as e:
|
146 |
+
st.error(f"Error in cycle {cycle + 1}: {str(e)}")
|
147 |
+
|
148 |
+
driver.quit()
|
149 |
+
|
150 |
+
elif tool == "Crawler":
|
151 |
+
st.header("Web Crawler")
|
152 |
+
base_url = st.text_input("Enter base URL to crawl")
|
153 |
+
max_pages = st.number_input("Maximum pages to crawl", 1, 100, 10)
|
154 |
+
search_term = st.text_input("Search term (optional)")
|
155 |
+
|
156 |
+
if st.button("Start Crawling"):
|
157 |
+
results = crawl_website(base_url, max_pages, search_term)
|
158 |
+
st.session_state.crawl_results = results
|
159 |
+
|
160 |
+
# Display results
|
161 |
+
df = pd.DataFrame(results)
|
162 |
+
st.dataframe(df)
|
163 |
+
|
164 |
+
# Export options
|
165 |
+
if st.button("Export Results"):
|
166 |
+
csv = df.to_csv(index=False)
|
167 |
+
b64 = base64.b64encode(csv.encode()).decode()
|
168 |
+
href = f'<a href="data:file/csv;base64,{b64}" download="crawl_results.csv">Download CSV</a>'
|
169 |
+
st.markdown(href, unsafe_allow_html=True)
|
170 |
+
|
171 |
+
else: # AI Content Comparison
|
172 |
+
st.header("AI Content Comparison")
|
173 |
+
url1 = st.text_input("Enter first URL (AI-generated content)")
|
174 |
+
url2 = st.text_input("Enter second URL (Comparison content)")
|
175 |
+
|
176 |
+
if st.button("Compare Content"):
|
177 |
+
driver = setup_chrome_driver()
|
178 |
+
|
179 |
+
try:
|
180 |
+
# Get content from first URL
|
181 |
+
driver.get(url1)
|
182 |
+
content1 = driver.find_element(By.TAG_NAME, "body").text
|
183 |
+
|
184 |
+
# Get content from second URL
|
185 |
+
driver.get(url2)
|
186 |
+
content2 = driver.find_element(By.TAG_NAME, "body").text
|
187 |
+
|
188 |
+
# Calculate similarities
|
189 |
+
basic_ratio, semantic_ratio = calculate_similarity(content1, content2)
|
190 |
+
|
191 |
+
# Display results
|
192 |
+
st.subheader("Similarity Results")
|
193 |
+
col1, col2 = st.columns(2)
|
194 |
+
|
195 |
+
with col1:
|
196 |
+
st.metric("Basic Similarity", f"{basic_ratio:.2%}")
|
197 |
+
|
198 |
+
with col2:
|
199 |
+
st.metric("Semantic Similarity", f"{semantic_ratio:.2%}")
|
200 |
+
|
201 |
+
# Show content previews
|
202 |
+
st.subheader("Content Previews")
|
203 |
+
st.text_area("Content 1 (First 500 chars)", content1[:500])
|
204 |
+
st.text_area("Content 2 (First 500 chars)", content2[:500])
|
205 |
+
|
206 |
+
except Exception as e:
|
207 |
+
st.error(f"Error comparing content: {str(e)}")
|
208 |
+
|
209 |
+
finally:
|
210 |
+
driver.quit()
|
211 |
+
|
212 |
+
if __name__ == "__main__":
|
213 |
+
main()
|
214 |
|
215 |
+
|
216 |
AppGoals="""
|
217 |
Computer Use
|
218 |
1. Browser based testing app
|