Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- README.md +5 -5
- app.py +59 -0
- requirements.txt +3 -0
- scrape_fake_app.py +85 -0
README.md
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
---
|
2 |
title: Fake App Scraper
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.33.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
-
short_description:
|
|
|
12 |
---
|
13 |
|
14 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: Fake App Scraper
|
3 |
+
emoji: 📈
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: pink
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.33.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
+
short_description: uses playwright to scrape a fake app hosted on vercel
|
12 |
+
tag: "mcp-server-track"
|
13 |
---
|
14 |
|
|
app.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from scrape_fake_app import get_homework,get_timetable
|
3 |
+
|
4 |
+
def fetch_homework(date:str='today') -> str:
|
5 |
+
"""
|
6 |
+
description:
|
7 |
+
fetch the homeworks.
|
8 |
+
Args:
|
9 |
+
date: any string, default "today"
|
10 |
+
Returns:
|
11 |
+
The string describing the homeworks
|
12 |
+
"""
|
13 |
+
return get_homework()
|
14 |
+
|
15 |
+
|
16 |
+
def fetch_timetable(date:str='today') -> str:
|
17 |
+
"""
|
18 |
+
description:
|
19 |
+
fetch the timetable
|
20 |
+
Args:
|
21 |
+
date: any string, default "today"
|
22 |
+
Returns:
|
23 |
+
The string describing the timetable
|
24 |
+
"""
|
25 |
+
return get_timetable()
|
26 |
+
|
27 |
+
|
28 |
+
|
29 |
+
with gr.Blocks() as demo:
|
30 |
+
|
31 |
+
# Add title and markdown
|
32 |
+
with gr.Row():
|
33 |
+
gr.Markdown("""## Fake App Dashboard
|
34 |
+
Use playwright to scrape homework and timetable from [fake-app-omega.vercel.app](https://fake-app-omega.vercel.app)
|
35 |
+
""")
|
36 |
+
|
37 |
+
with gr.Row():
|
38 |
+
date = gr.Textbox(label="date",visible=False)
|
39 |
+
|
40 |
+
with gr.Row():
|
41 |
+
with gr.Column():
|
42 |
+
homeworks_btn = gr.Button("Homeworks")
|
43 |
+
homeworks_output = gr.Textbox(label="Homeworks Result", lines=10)
|
44 |
+
|
45 |
+
with gr.Column():
|
46 |
+
timetable_btn = gr.Button("Timetable")
|
47 |
+
timetable_output = gr.Textbox(label="Timetable Result", lines=10)
|
48 |
+
|
49 |
+
|
50 |
+
homeworks_btn.click(fn=fetch_homework,
|
51 |
+
inputs=[date],
|
52 |
+
outputs=homeworks_output)
|
53 |
+
|
54 |
+
|
55 |
+
timetable_btn.click(fn=fetch_timetable,
|
56 |
+
inputs=[date],
|
57 |
+
outputs=timetable_output)
|
58 |
+
|
59 |
+
demo.launch(mcp_server=True)
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio[mcp]
|
2 |
+
python-dotenv
|
3 |
+
playwright
|
scrape_fake_app.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from playwright.sync_api import sync_playwright
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
def load_credentials()-> dict:
|
6 |
+
load_dotenv()
|
7 |
+
URL = os.getenv('FAKE_APP_URL')
|
8 |
+
USERNAME = os.getenv('FAKE_APP_USERNAME')
|
9 |
+
PASSWORD = os.getenv('FAKE_APP_PASSWORD')
|
10 |
+
return (URL,USERNAME,PASSWORD)
|
11 |
+
|
12 |
+
def extract_homework_text(page) -> str:
|
13 |
+
card = page.get_by_title("homework")
|
14 |
+
content = card.locator("[data-slot='card-content'] section > div").all()
|
15 |
+
|
16 |
+
output = ["Homework:\n"]
|
17 |
+
for section in content:
|
18 |
+
heading = section.locator("h3").inner_text()
|
19 |
+
output.append(heading)
|
20 |
+
items = section.locator("ul > li").all()
|
21 |
+
for item in items:
|
22 |
+
# Extract full inner text including formatting
|
23 |
+
inner = item.inner_text().strip()
|
24 |
+
output.append(f" {inner}")
|
25 |
+
output.append("") # Add a blank line between sections
|
26 |
+
|
27 |
+
return "\n".join(output).strip()
|
28 |
+
|
29 |
+
def extract_timetable_text(page):
|
30 |
+
card = page.get_by_title("timetable")
|
31 |
+
items = card.locator("[data-slot='card-content'] ul > li").all()
|
32 |
+
|
33 |
+
output = ["Timetable:\n"]
|
34 |
+
for item in items:
|
35 |
+
# Check if it's a plain text item like "Lunch break"
|
36 |
+
if item.locator("span").count() == 0:
|
37 |
+
output.append(item.inner_text().strip())
|
38 |
+
else:
|
39 |
+
parts = item.locator("span").all()
|
40 |
+
line = " ".join([part.inner_text().strip() for part in parts])
|
41 |
+
output.append(line)
|
42 |
+
|
43 |
+
return "\n".join(output).strip()
|
44 |
+
|
45 |
+
|
46 |
+
# print(URL,USERNAME,PASSWORD)
|
47 |
+
def get_homework() -> str:
|
48 |
+
URL,USERNAME,PASSWORD=load_credentials()
|
49 |
+
|
50 |
+
with sync_playwright() as playwright:
|
51 |
+
browser = playwright.firefox.launch(headless=True)
|
52 |
+
page = browser.new_page()
|
53 |
+
page.goto(URL,wait_until="domcontentloaded")
|
54 |
+
|
55 |
+
page.get_by_role('textbox',name='username').fill(USERNAME)
|
56 |
+
page.get_by_role('textbox',name='password').fill(PASSWORD)
|
57 |
+
page.get_by_role('button',name='login').click()
|
58 |
+
page.wait_for_url("**/dashboard")
|
59 |
+
# page.wait_for_timeout(1000)
|
60 |
+
homework = extract_homework_text(page)
|
61 |
+
browser.close()
|
62 |
+
return homework
|
63 |
+
|
64 |
+
def get_timetable() -> str:
|
65 |
+
URL,USERNAME,PASSWORD=load_credentials()
|
66 |
+
|
67 |
+
with sync_playwright() as playwright:
|
68 |
+
browser = playwright.firefox.launch(headless=True)
|
69 |
+
page = browser.new_page()
|
70 |
+
page.goto(URL,wait_until="domcontentloaded")
|
71 |
+
|
72 |
+
page.get_by_role('textbox',name='username').fill(USERNAME)
|
73 |
+
page.get_by_role('textbox',name='password').fill(PASSWORD)
|
74 |
+
page.get_by_role('button',name='login').click()
|
75 |
+
page.wait_for_url("**/dashboard")
|
76 |
+
# page.wait_for_timeout(1000)
|
77 |
+
timetable = extract_timetable_text(page)
|
78 |
+
browser.close()
|
79 |
+
|
80 |
+
return timetable
|
81 |
+
|
82 |
+
if __name__=="__main__":
|
83 |
+
print(get_homework())
|
84 |
+
print()
|
85 |
+
print(get_timetable())
|