awacke1 commited on
Commit
f19e94e
·
verified ·
1 Parent(s): 579471f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +213 -0
app.py CHANGED
@@ -1,5 +1,218 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
 
3
  AppGoals="""
4
  Computer Use
5
  1. Browser based testing app
 
1
  import streamlit as st
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.options import Options
6
+ from selenium.webdriver.common.by import By
7
+ from PIL import Image
8
+ import imagehash
9
+ from difflib import SequenceMatcher
10
+ from sklearn.feature_extraction.text import TfidfVectorizer
11
+ from sklearn.metrics.pairwise import cosine_similarity
12
+ import time
13
+ import io
14
+ import base64
15
+ from urllib.parse import urljoin, urlparse
16
+ import pandas as pd
17
+ import plotly.graph_objects as go
18
+ import numpy as np
19
+
20
+ def initialize_session_state():
21
+ if 'visited_urls' not in st.session_state:
22
+ st.session_state.visited_urls = []
23
+ if 'load_times' not in st.session_state:
24
+ st.session_state.load_times = []
25
+ if 'screenshots' not in st.session_state:
26
+ st.session_state.screenshots = []
27
+ if 'crawl_results' not in st.session_state:
28
+ st.session_state.crawl_results = []
29
+
30
+ def setup_chrome_driver():
31
+ chrome_options = Options()
32
+ chrome_options.add_argument("--headless")
33
+ chrome_options.add_argument("--no-sandbox")
34
+ chrome_options.add_argument("--disable-dev-shm-usage")
35
+ return webdriver.Chrome(options=chrome_options)
36
+
37
+ def capture_screenshot(driver):
38
+ screenshot = driver.get_screenshot_as_png()
39
+ return Image.open(io.BytesIO(screenshot))
40
+
41
+ def calculate_similarity(text1, text2):
42
+ # Basic similarity
43
+ basic_ratio = SequenceMatcher(None, text1, text2).ratio()
44
+
45
+ # Semantic similarity
46
+ vectorizer = TfidfVectorizer()
47
+ try:
48
+ tfidf = vectorizer.fit_transform([text1, text2])
49
+ semantic_ratio = cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]
50
+ except:
51
+ semantic_ratio = 0
52
+
53
+ return basic_ratio, semantic_ratio
54
+
55
+ def crawl_website(url, max_pages=10, search_term=None):
56
+ visited = set()
57
+ to_visit = {url}
58
+ results = []
59
+
60
+ while to_visit and len(visited) < max_pages:
61
+ current_url = to_visit.pop()
62
+ if current_url in visited:
63
+ continue
64
+
65
+ try:
66
+ response = requests.get(current_url)
67
+ soup = BeautifulSoup(response.text, 'html.parser')
68
+ visited.add(current_url)
69
+
70
+ # Extract text content
71
+ text_content = soup.get_text()
72
+
73
+ # If search term provided, check for matches
74
+ match_found = search_term.lower() in text_content.lower() if search_term else True
75
+
76
+ if match_found:
77
+ results.append({
78
+ 'url': current_url,
79
+ 'title': soup.title.string if soup.title else 'No title',
80
+ 'content_preview': text_content[:200],
81
+ 'matches_search': match_found
82
+ })
83
+
84
+ # Find new links
85
+ for link in soup.find_all('a'):
86
+ href = link.get('href')
87
+ if href:
88
+ absolute_url = urljoin(current_url, href)
89
+ if urlparse(absolute_url).netloc == urlparse(url).netloc:
90
+ to_visit.add(absolute_url)
91
+
92
+ except Exception as e:
93
+ st.error(f"Error crawling {current_url}: {str(e)}")
94
+
95
+ return results
96
+
97
+ def main():
98
+ st.title("Web Testing and Crawling Suite")
99
+ initialize_session_state()
100
+
101
+ # Sidebar for tool selection
102
+ tool = st.sidebar.radio(
103
+ "Select Tool",
104
+ ["WebTest", "Crawler", "AI Content Comparison"]
105
+ )
106
+
107
+ if tool == "WebTest":
108
+ st.header("WebTest - Web Performance Testing")
109
+ url = st.text_input("Enter URL to test")
110
+ interval = st.slider("Time interval between requests (seconds)", 1, 30, 5)
111
+ max_cycles = st.number_input("Number of test cycles", 1, 100, 1)
112
+
113
+ if st.button("Start Testing"):
114
+ driver = setup_chrome_driver()
115
+
116
+ for cycle in range(max_cycles):
117
+ start_time = time.time()
118
+
119
+ try:
120
+ driver.get(url)
121
+ load_time = time.time() - start_time
122
+ st.session_state.load_times.append(load_time)
123
+
124
+ # Capture screenshot
125
+ screenshot = capture_screenshot(driver)
126
+ st.session_state.screenshots.append(screenshot)
127
+
128
+ # Show results
129
+ st.success(f"Cycle {cycle + 1} completed - Load time: {load_time:.2f}s")
130
+ st.image(screenshot, caption=f"Screenshot - Cycle {cycle + 1}")
131
+
132
+ # Plot load times
133
+ fig = go.Figure(data=go.Scatter(
134
+ x=list(range(1, len(st.session_state.load_times) + 1)),
135
+ y=st.session_state.load_times,
136
+ mode='lines+markers'
137
+ ))
138
+ fig.update_layout(title="Page Load Times",
139
+ xaxis_title="Cycle",
140
+ yaxis_title="Load Time (s)")
141
+ st.plotly_chart(fig)
142
+
143
+ time.sleep(interval)
144
+
145
+ except Exception as e:
146
+ st.error(f"Error in cycle {cycle + 1}: {str(e)}")
147
+
148
+ driver.quit()
149
+
150
+ elif tool == "Crawler":
151
+ st.header("Web Crawler")
152
+ base_url = st.text_input("Enter base URL to crawl")
153
+ max_pages = st.number_input("Maximum pages to crawl", 1, 100, 10)
154
+ search_term = st.text_input("Search term (optional)")
155
+
156
+ if st.button("Start Crawling"):
157
+ results = crawl_website(base_url, max_pages, search_term)
158
+ st.session_state.crawl_results = results
159
+
160
+ # Display results
161
+ df = pd.DataFrame(results)
162
+ st.dataframe(df)
163
+
164
+ # Export options
165
+ if st.button("Export Results"):
166
+ csv = df.to_csv(index=False)
167
+ b64 = base64.b64encode(csv.encode()).decode()
168
+ href = f'<a href="data:file/csv;base64,{b64}" download="crawl_results.csv">Download CSV</a>'
169
+ st.markdown(href, unsafe_allow_html=True)
170
+
171
+ else: # AI Content Comparison
172
+ st.header("AI Content Comparison")
173
+ url1 = st.text_input("Enter first URL (AI-generated content)")
174
+ url2 = st.text_input("Enter second URL (Comparison content)")
175
+
176
+ if st.button("Compare Content"):
177
+ driver = setup_chrome_driver()
178
+
179
+ try:
180
+ # Get content from first URL
181
+ driver.get(url1)
182
+ content1 = driver.find_element(By.TAG_NAME, "body").text
183
+
184
+ # Get content from second URL
185
+ driver.get(url2)
186
+ content2 = driver.find_element(By.TAG_NAME, "body").text
187
+
188
+ # Calculate similarities
189
+ basic_ratio, semantic_ratio = calculate_similarity(content1, content2)
190
+
191
+ # Display results
192
+ st.subheader("Similarity Results")
193
+ col1, col2 = st.columns(2)
194
+
195
+ with col1:
196
+ st.metric("Basic Similarity", f"{basic_ratio:.2%}")
197
+
198
+ with col2:
199
+ st.metric("Semantic Similarity", f"{semantic_ratio:.2%}")
200
+
201
+ # Show content previews
202
+ st.subheader("Content Previews")
203
+ st.text_area("Content 1 (First 500 chars)", content1[:500])
204
+ st.text_area("Content 2 (First 500 chars)", content2[:500])
205
+
206
+ except Exception as e:
207
+ st.error(f"Error comparing content: {str(e)}")
208
+
209
+ finally:
210
+ driver.quit()
211
+
212
+ if __name__ == "__main__":
213
+ main()
214
 
215
+
216
  AppGoals="""
217
  Computer Use
218
  1. Browser based testing app