sylvain471 commited on
Commit
1b7550e
·
verified ·
1 Parent(s): f981d74

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +5 -5
  2. app.py +59 -0
  3. requirements.txt +3 -0
  4. scrape_fake_app.py +85 -0
README.md CHANGED
@@ -1,14 +1,14 @@
1
  ---
2
  title: Fake App Scraper
3
- emoji: 🐠
4
- colorFrom: gray
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 5.33.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
- short_description: test
 
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Fake App Scraper
3
+ emoji: 📈
4
+ colorFrom: green
5
+ colorTo: pink
6
  sdk: gradio
7
  sdk_version: 5.33.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ short_description: uses playwright to scrape a fake app hosted on vercel
12
+ tag: "mcp-server-track"
13
  ---
14
 
 
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from scrape_fake_app import get_homework,get_timetable
3
+
4
+ def fetch_homework(date:str='today') -> str:
5
+ """
6
+ description:
7
+ fetch the homeworks.
8
+ Args:
9
+ date: any string, default "today"
10
+ Returns:
11
+ The string describing the homeworks
12
+ """
13
+ return get_homework()
14
+
15
+
16
+ def fetch_timetable(date:str='today') -> str:
17
+ """
18
+ description:
19
+ fetch the timetable
20
+ Args:
21
+ date: any string, default "today"
22
+ Returns:
23
+ The string describing the timetable
24
+ """
25
+ return get_timetable()
26
+
27
+
28
+
29
+ with gr.Blocks() as demo:
30
+
31
+ # Add title and markdown
32
+ with gr.Row():
33
+ gr.Markdown("""## Fake App Dashboard
34
+ Use playwright to scrape homework and timetable from [fake-app-omega.vercel.app](https://fake-app-omega.vercel.app)
35
+ """)
36
+
37
+ with gr.Row():
38
+ date = gr.Textbox(label="date",visible=False)
39
+
40
+ with gr.Row():
41
+ with gr.Column():
42
+ homeworks_btn = gr.Button("Homeworks")
43
+ homeworks_output = gr.Textbox(label="Homeworks Result", lines=10)
44
+
45
+ with gr.Column():
46
+ timetable_btn = gr.Button("Timetable")
47
+ timetable_output = gr.Textbox(label="Timetable Result", lines=10)
48
+
49
+
50
+ homeworks_btn.click(fn=fetch_homework,
51
+ inputs=[date],
52
+ outputs=homeworks_output)
53
+
54
+
55
+ timetable_btn.click(fn=fetch_timetable,
56
+ inputs=[date],
57
+ outputs=timetable_output)
58
+
59
+ demo.launch(mcp_server=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio[mcp]
2
+ python-dotenv
3
+ playwright
scrape_fake_app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from playwright.sync_api import sync_playwright
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+ def load_credentials()-> dict:
6
+ load_dotenv()
7
+ URL = os.getenv('FAKE_APP_URL')
8
+ USERNAME = os.getenv('FAKE_APP_USERNAME')
9
+ PASSWORD = os.getenv('FAKE_APP_PASSWORD')
10
+ return (URL,USERNAME,PASSWORD)
11
+
12
+ def extract_homework_text(page) -> str:
13
+ card = page.get_by_title("homework")
14
+ content = card.locator("[data-slot='card-content'] section > div").all()
15
+
16
+ output = ["Homework:\n"]
17
+ for section in content:
18
+ heading = section.locator("h3").inner_text()
19
+ output.append(heading)
20
+ items = section.locator("ul > li").all()
21
+ for item in items:
22
+ # Extract full inner text including formatting
23
+ inner = item.inner_text().strip()
24
+ output.append(f" {inner}")
25
+ output.append("") # Add a blank line between sections
26
+
27
+ return "\n".join(output).strip()
28
+
29
+ def extract_timetable_text(page):
30
+ card = page.get_by_title("timetable")
31
+ items = card.locator("[data-slot='card-content'] ul > li").all()
32
+
33
+ output = ["Timetable:\n"]
34
+ for item in items:
35
+ # Check if it's a plain text item like "Lunch break"
36
+ if item.locator("span").count() == 0:
37
+ output.append(item.inner_text().strip())
38
+ else:
39
+ parts = item.locator("span").all()
40
+ line = " ".join([part.inner_text().strip() for part in parts])
41
+ output.append(line)
42
+
43
+ return "\n".join(output).strip()
44
+
45
+
46
+ # print(URL,USERNAME,PASSWORD)
47
+ def get_homework() -> str:
48
+ URL,USERNAME,PASSWORD=load_credentials()
49
+
50
+ with sync_playwright() as playwright:
51
+ browser = playwright.firefox.launch(headless=True)
52
+ page = browser.new_page()
53
+ page.goto(URL,wait_until="domcontentloaded")
54
+
55
+ page.get_by_role('textbox',name='username').fill(USERNAME)
56
+ page.get_by_role('textbox',name='password').fill(PASSWORD)
57
+ page.get_by_role('button',name='login').click()
58
+ page.wait_for_url("**/dashboard")
59
+ # page.wait_for_timeout(1000)
60
+ homework = extract_homework_text(page)
61
+ browser.close()
62
+ return homework
63
+
64
+ def get_timetable() -> str:
65
+ URL,USERNAME,PASSWORD=load_credentials()
66
+
67
+ with sync_playwright() as playwright:
68
+ browser = playwright.firefox.launch(headless=True)
69
+ page = browser.new_page()
70
+ page.goto(URL,wait_until="domcontentloaded")
71
+
72
+ page.get_by_role('textbox',name='username').fill(USERNAME)
73
+ page.get_by_role('textbox',name='password').fill(PASSWORD)
74
+ page.get_by_role('button',name='login').click()
75
+ page.wait_for_url("**/dashboard")
76
+ # page.wait_for_timeout(1000)
77
+ timetable = extract_timetable_text(page)
78
+ browser.close()
79
+
80
+ return timetable
81
+
82
+ if __name__=="__main__":
83
+ print(get_homework())
84
+ print()
85
+ print(get_timetable())