sylvain471 commited on
Commit
4365c47
·
verified ·
1 Parent(s): 4abf986

Upload 7 files

Browse files
Files changed (7) hide show
  1. Dockerfile +51 -0
  2. README.md +6 -5
  3. app.py +90 -0
  4. dashboard.png +0 -0
  5. login.png +0 -0
  6. requirements.txt +4 -0
  7. scrape_fake_app.py +93 -0
Dockerfile ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ RUN apt-get update && apt-get install -y \
4
+ libnss3 \
5
+ libnspr4 \
6
+ libatk1.0-0 \
7
+ libatk-bridge2.0-0 \
8
+ libcups2 \
9
+ libatspi2.0-0 \
10
+ libxcomposite1 \
11
+ libxdamage1 \
12
+ libxrandr2 \
13
+ libgbm-dev \
14
+ libgtk-3-0 \
15
+ xdg-utils \
16
+ libasound2 \
17
+ mc \
18
+ && rm -rf /var/lib/apt/lists/*
19
+
20
+ WORKDIR /code
21
+
22
+ COPY ./requirements.txt /code/requirements.txt
23
+
24
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
25
+
26
+ RUN pip install playwright
27
+ RUN playwright install --with-deps
28
+
29
+
30
+ RUN useradd -m -u 1000 user
31
+ USER user
32
+
33
+ ENV HOME=/home/user \
34
+ PATH=/home/user/.local/bin:$PATH \
35
+ PYTHONPATH=$HOME/app \
36
+ PYTHONUNBUFFERED=1 \
37
+ GRADIO_ALLOW_FLAGGING=never \
38
+ GRADIO_NUM_PORTS=1 \
39
+ GRADIO_SERVER_NAME=0.0.0.0 \
40
+ GRADIO_THEME=huggingface \
41
+ SYSTEM=spaces
42
+
43
+ # Set the working directory to the user's home directory
44
+ WORKDIR $HOME/app
45
+
46
+ RUN playwright install firefox
47
+
48
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
49
+ COPY --chown=user . $HOME/app
50
+
51
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
  title: Fake App Scraper
3
- emoji: 😻
4
- colorFrom: yellow
5
- colorTo: blue
6
  sdk: docker
 
7
  pinned: false
8
  license: mit
9
- short_description: use playwright to scrape a fake app
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Fake App Scraper
3
+ emoji: 📈
4
+ colorFrom: green
5
+ colorTo: pink
6
  sdk: docker
7
+ app_file: app.py
8
  pinned: false
9
  license: mit
10
+ short_description: uses playwright to scrape a fake app hosted on vercel
11
+ tag: "mcp-server-track"
12
  ---
13
 
 
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from scrape_fake_app import get_homework,get_timetable
3
+ import subprocess
4
+ import os
5
+ from dotenv import load_dotenv
6
+
7
+ if os.getenv('ENVIRONMENT')=="PROD":
8
+ print("installing playwright firefox")
9
+ subprocess.run(["playwright","install","firefox"])
10
+
11
+ def fetch_homework(date:str='today') -> str:
12
+ """
13
+ description:
14
+ fetch the homeworks.
15
+ Args:
16
+ date: any string, default "today"
17
+ Returns:
18
+ The string describing the homeworks
19
+ """
20
+ return get_homework()
21
+
22
+
23
+ def fetch_timetable(date:str='today') -> str:
24
+ """
25
+ description:
26
+ fetch the timetable
27
+ Args:
28
+ date: any string, default "today"
29
+ Returns:
30
+ The string describing the timetable
31
+ """
32
+ return get_timetable()
33
+
34
+
35
+ title="<h2>Gradio MCP Hackathon: fake-app-scraper</h2>"
36
+ description="""<div style="font-family: sans-serif; line-height: 1.6;">
37
+ <p>
38
+ This app uses Playwright to log in and scrape the content of the dashboard of the fake app
39
+ <a href="https://fake-app-omega.vercel.app" target="_blank">fake-app-omega.vercel.app</a>.
40
+ </p>
41
+ <p>
42
+ The starting point was to provide an LLM-friendly, API-fied version of a real app that does not provide any API. Used as an MCP server, any user of the app could simply ask their AI assistant to fetch information from their dashboard.
43
+ </p>
44
+ <h3>Problem</h3>
45
+ <p>
46
+ When run locally, the app can take the credentials as environment variables to log into the user's dashboard. However, when hosted as a Hugging Face Space, I couldn't find a way to send the credentials securely—i.e., without explicitly providing them to the LLM.
47
+ </p>
48
+ <p>
49
+ In conclusion, as it stands, this app together with the fake Next.js app only serves demonstration or educational purposes and does not solve a real-life problem.
50
+ </p>
51
+ <p><strong>I’d be happy to get any suggestions on how to send credentials in the context of a Gradio HF Space–hosted app.</strong></p>
52
+ </div>
53
+ """
54
+
55
+ images="""<img src="gradio_api/file=login.png" alt="login" style="max-width: 35%; margin-right: 10px;" />
56
+ <img src="gradio_api/file=dashboard.png" alt="dashboard" style="max-width: 35%;" />
57
+ """
58
+
59
+ with gr.Blocks() as demo:
60
+
61
+ # Add title and markdown
62
+ with gr.Row():
63
+ gr.HTML(title)
64
+
65
+ with gr.Row():
66
+ with gr.Column():
67
+ homeworks_btn = gr.Button("Homeworks")
68
+ homeworks_output = gr.Textbox(label="Homeworks Result", lines=5)
69
+
70
+ with gr.Column():
71
+ timetable_btn = gr.Button("Timetable")
72
+ timetable_output = gr.Textbox(label="Timetable Result", lines=5)
73
+
74
+ with gr.Row():
75
+ date = gr.Textbox(label="date",visible=False)
76
+
77
+ with gr.Row():
78
+ gr.HTML(description)
79
+ gr.HTML(images)
80
+
81
+ homeworks_btn.click(fn=fetch_homework,
82
+ inputs=[date],
83
+ outputs=homeworks_output)
84
+
85
+
86
+ timetable_btn.click(fn=fetch_timetable,
87
+ inputs=[date],
88
+ outputs=timetable_output)
89
+
90
+ demo.launch(mcp_server=True,allowed_paths=["/"])
dashboard.png ADDED
login.png ADDED
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio[mcp]
2
+ python-dotenv
3
+ # pytest-playwright
4
+ # playwright
scrape_fake_app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from playwright.sync_api import sync_playwright
2
+ import os
3
+ from dotenv import load_dotenv
4
+ import json
5
+
6
+ def load_credentials()-> dict:
7
+ load_dotenv()
8
+ URL = os.getenv('FAKE_APP_URL')
9
+ USERNAME = os.getenv('FAKE_APP_USERNAME')
10
+ PASSWORD = os.getenv('FAKE_APP_PASSWORD')
11
+ return (URL,USERNAME,PASSWORD)
12
+
13
+ def extract_homework_text(page) -> str:
14
+ card = page.get_by_title("homework")
15
+ content = card.locator("[data-slot='card-content'] section > div").all()
16
+
17
+ output = ["Homework:\n"]
18
+ for section in content:
19
+ heading = section.locator("h3").inner_text()
20
+ output.append(heading)
21
+ items = section.locator("ul > li").all()
22
+ for item in items:
23
+ # Extract full inner text including formatting
24
+ inner = item.inner_text().strip()
25
+ output.append(f" {inner}")
26
+ output.append("") # Add a blank line between sections
27
+
28
+ return "\n".join(output).strip()
29
+
30
+ def extract_timetable_text(page):
31
+ card = page.get_by_title("timetable")
32
+ items = card.locator("[data-slot='card-content'] ul > li").all()
33
+
34
+ output = ["Timetable:\n"]
35
+ for item in items:
36
+ # Check if it's a plain text item like "Lunch break"
37
+ if item.locator("span").count() == 0:
38
+ output.append(item.inner_text().strip())
39
+ else:
40
+ parts = item.locator("span").all()
41
+ line = " ".join([part.inner_text().strip() for part in parts])
42
+ output.append(line)
43
+
44
+ return "\n".join(output).strip()
45
+
46
+
47
+ # print(URL,USERNAME,PASSWORD)
48
+ def get_homework() -> str:
49
+ try:
50
+ URL,USERNAME,PASSWORD=load_credentials()
51
+
52
+ with sync_playwright() as playwright:
53
+ browser = playwright.firefox.launch(headless=True)
54
+ page = browser.new_page()
55
+ page.goto(URL,wait_until="domcontentloaded")
56
+
57
+ page.get_by_role('textbox',name='username').fill(USERNAME)
58
+ page.get_by_role('textbox',name='password').fill(PASSWORD)
59
+ page.get_by_role('button',name='login').click()
60
+ page.wait_for_url("**/dashboard")
61
+ # page.wait_for_timeout(1000)
62
+ homework = extract_homework_text(page)
63
+ browser.close()
64
+ return homework
65
+
66
+ except Exception as e:
67
+ return json.dumps(e)
68
+
69
+ def get_timetable() -> str:
70
+ try:
71
+ URL,USERNAME,PASSWORD=load_credentials()
72
+
73
+ with sync_playwright() as playwright:
74
+ browser = playwright.firefox.launch(headless=True)
75
+ page = browser.new_page()
76
+ page.goto(URL,wait_until="domcontentloaded")
77
+
78
+ page.get_by_role('textbox',name='username').fill(USERNAME)
79
+ page.get_by_role('textbox',name='password').fill(PASSWORD)
80
+ page.get_by_role('button',name='login').click()
81
+ page.wait_for_url("**/dashboard")
82
+ # page.wait_for_timeout(1000)
83
+ timetable = extract_timetable_text(page)
84
+ browser.close()
85
+ return timetable
86
+
87
+ except Exception as e:
88
+ return json.dumps(e)
89
+
90
+ if __name__=="__main__":
91
+ print(get_homework())
92
+ print()
93
+ print(get_timetable())