Upload 7 files
Browse files- Dockerfile +51 -0
- README.md +6 -5
- app.py +90 -0
- dashboard.png +0 -0
- login.png +0 -0
- requirements.txt +4 -0
- scrape_fake_app.py +93 -0
Dockerfile
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10-slim
|
2 |
+
|
3 |
+
RUN apt-get update && apt-get install -y \
|
4 |
+
libnss3 \
|
5 |
+
libnspr4 \
|
6 |
+
libatk1.0-0 \
|
7 |
+
libatk-bridge2.0-0 \
|
8 |
+
libcups2 \
|
9 |
+
libatspi2.0-0 \
|
10 |
+
libxcomposite1 \
|
11 |
+
libxdamage1 \
|
12 |
+
libxrandr2 \
|
13 |
+
libgbm-dev \
|
14 |
+
libgtk-3-0 \
|
15 |
+
xdg-utils \
|
16 |
+
libasound2 \
|
17 |
+
mc \
|
18 |
+
&& rm -rf /var/lib/apt/lists/*
|
19 |
+
|
20 |
+
WORKDIR /code
|
21 |
+
|
22 |
+
COPY ./requirements.txt /code/requirements.txt
|
23 |
+
|
24 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
25 |
+
|
26 |
+
RUN pip install playwright
|
27 |
+
RUN playwright install --with-deps
|
28 |
+
|
29 |
+
|
30 |
+
RUN useradd -m -u 1000 user
|
31 |
+
USER user
|
32 |
+
|
33 |
+
ENV HOME=/home/user \
|
34 |
+
PATH=/home/user/.local/bin:$PATH \
|
35 |
+
PYTHONPATH=$HOME/app \
|
36 |
+
PYTHONUNBUFFERED=1 \
|
37 |
+
GRADIO_ALLOW_FLAGGING=never \
|
38 |
+
GRADIO_NUM_PORTS=1 \
|
39 |
+
GRADIO_SERVER_NAME=0.0.0.0 \
|
40 |
+
GRADIO_THEME=huggingface \
|
41 |
+
SYSTEM=spaces
|
42 |
+
|
43 |
+
# Set the working directory to the user's home directory
|
44 |
+
WORKDIR $HOME/app
|
45 |
+
|
46 |
+
RUN playwright install firefox
|
47 |
+
|
48 |
+
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
49 |
+
COPY --chown=user . $HOME/app
|
50 |
+
|
51 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
---
|
2 |
title: Fake App Scraper
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
|
|
7 |
pinned: false
|
8 |
license: mit
|
9 |
-
short_description:
|
|
|
10 |
---
|
11 |
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: Fake App Scraper
|
3 |
+
emoji: 📈
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: pink
|
6 |
sdk: docker
|
7 |
+
app_file: app.py
|
8 |
pinned: false
|
9 |
license: mit
|
10 |
+
short_description: uses playwright to scrape a fake app hosted on vercel
|
11 |
+
tag: "mcp-server-track"
|
12 |
---
|
13 |
|
|
app.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from scrape_fake_app import get_homework,get_timetable
|
3 |
+
import subprocess
|
4 |
+
import os
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
|
7 |
+
if os.getenv('ENVIRONMENT')=="PROD":
|
8 |
+
print("installing playwright firefox")
|
9 |
+
subprocess.run(["playwright","install","firefox"])
|
10 |
+
|
11 |
+
def fetch_homework(date:str='today') -> str:
|
12 |
+
"""
|
13 |
+
description:
|
14 |
+
fetch the homeworks.
|
15 |
+
Args:
|
16 |
+
date: any string, default "today"
|
17 |
+
Returns:
|
18 |
+
The string describing the homeworks
|
19 |
+
"""
|
20 |
+
return get_homework()
|
21 |
+
|
22 |
+
|
23 |
+
def fetch_timetable(date:str='today') -> str:
|
24 |
+
"""
|
25 |
+
description:
|
26 |
+
fetch the timetable
|
27 |
+
Args:
|
28 |
+
date: any string, default "today"
|
29 |
+
Returns:
|
30 |
+
The string describing the timetable
|
31 |
+
"""
|
32 |
+
return get_timetable()
|
33 |
+
|
34 |
+
|
35 |
+
title="<h2>Gradio MCP Hackathon: fake-app-scraper</h2>"
|
36 |
+
description="""<div style="font-family: sans-serif; line-height: 1.6;">
|
37 |
+
<p>
|
38 |
+
This app uses Playwright to log in and scrape the content of the dashboard of the fake app
|
39 |
+
<a href="https://fake-app-omega.vercel.app" target="_blank">fake-app-omega.vercel.app</a>.
|
40 |
+
</p>
|
41 |
+
<p>
|
42 |
+
The starting point was to provide an LLM-friendly, API-fied version of a real app that does not provide any API. Used as an MCP server, any user of the app could simply ask their AI assistant to fetch information from their dashboard.
|
43 |
+
</p>
|
44 |
+
<h3>Problem</h3>
|
45 |
+
<p>
|
46 |
+
When run locally, the app can take the credentials as environment variables to log into the user's dashboard. However, when hosted as a Hugging Face Space, I couldn't find a way to send the credentials securely—i.e., without explicitly providing them to the LLM.
|
47 |
+
</p>
|
48 |
+
<p>
|
49 |
+
In conclusion, as it stands, this app together with the fake Next.js app only serves demonstration or educational purposes and does not solve a real-life problem.
|
50 |
+
</p>
|
51 |
+
<p><strong>I’d be happy to get any suggestions on how to send credentials in the context of a Gradio HF Space–hosted app.</strong></p>
|
52 |
+
</div>
|
53 |
+
"""
|
54 |
+
|
55 |
+
images="""<img src="gradio_api/file=login.png" alt="login" style="max-width: 35%; margin-right: 10px;" />
|
56 |
+
<img src="gradio_api/file=dashboard.png" alt="dashboard" style="max-width: 35%;" />
|
57 |
+
"""
|
58 |
+
|
59 |
+
with gr.Blocks() as demo:
|
60 |
+
|
61 |
+
# Add title and markdown
|
62 |
+
with gr.Row():
|
63 |
+
gr.HTML(title)
|
64 |
+
|
65 |
+
with gr.Row():
|
66 |
+
with gr.Column():
|
67 |
+
homeworks_btn = gr.Button("Homeworks")
|
68 |
+
homeworks_output = gr.Textbox(label="Homeworks Result", lines=5)
|
69 |
+
|
70 |
+
with gr.Column():
|
71 |
+
timetable_btn = gr.Button("Timetable")
|
72 |
+
timetable_output = gr.Textbox(label="Timetable Result", lines=5)
|
73 |
+
|
74 |
+
with gr.Row():
|
75 |
+
date = gr.Textbox(label="date",visible=False)
|
76 |
+
|
77 |
+
with gr.Row():
|
78 |
+
gr.HTML(description)
|
79 |
+
gr.HTML(images)
|
80 |
+
|
81 |
+
homeworks_btn.click(fn=fetch_homework,
|
82 |
+
inputs=[date],
|
83 |
+
outputs=homeworks_output)
|
84 |
+
|
85 |
+
|
86 |
+
timetable_btn.click(fn=fetch_timetable,
|
87 |
+
inputs=[date],
|
88 |
+
outputs=timetable_output)
|
89 |
+
|
90 |
+
demo.launch(mcp_server=True,allowed_paths=["/"])
|
dashboard.png
ADDED
![]() |
login.png
ADDED
![]() |
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio[mcp]
|
2 |
+
python-dotenv
|
3 |
+
# pytest-playwright
|
4 |
+
# playwright
|
scrape_fake_app.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from playwright.sync_api import sync_playwright
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import json
|
5 |
+
|
6 |
+
def load_credentials()-> dict:
|
7 |
+
load_dotenv()
|
8 |
+
URL = os.getenv('FAKE_APP_URL')
|
9 |
+
USERNAME = os.getenv('FAKE_APP_USERNAME')
|
10 |
+
PASSWORD = os.getenv('FAKE_APP_PASSWORD')
|
11 |
+
return (URL,USERNAME,PASSWORD)
|
12 |
+
|
13 |
+
def extract_homework_text(page) -> str:
|
14 |
+
card = page.get_by_title("homework")
|
15 |
+
content = card.locator("[data-slot='card-content'] section > div").all()
|
16 |
+
|
17 |
+
output = ["Homework:\n"]
|
18 |
+
for section in content:
|
19 |
+
heading = section.locator("h3").inner_text()
|
20 |
+
output.append(heading)
|
21 |
+
items = section.locator("ul > li").all()
|
22 |
+
for item in items:
|
23 |
+
# Extract full inner text including formatting
|
24 |
+
inner = item.inner_text().strip()
|
25 |
+
output.append(f" {inner}")
|
26 |
+
output.append("") # Add a blank line between sections
|
27 |
+
|
28 |
+
return "\n".join(output).strip()
|
29 |
+
|
30 |
+
def extract_timetable_text(page):
|
31 |
+
card = page.get_by_title("timetable")
|
32 |
+
items = card.locator("[data-slot='card-content'] ul > li").all()
|
33 |
+
|
34 |
+
output = ["Timetable:\n"]
|
35 |
+
for item in items:
|
36 |
+
# Check if it's a plain text item like "Lunch break"
|
37 |
+
if item.locator("span").count() == 0:
|
38 |
+
output.append(item.inner_text().strip())
|
39 |
+
else:
|
40 |
+
parts = item.locator("span").all()
|
41 |
+
line = " ".join([part.inner_text().strip() for part in parts])
|
42 |
+
output.append(line)
|
43 |
+
|
44 |
+
return "\n".join(output).strip()
|
45 |
+
|
46 |
+
|
47 |
+
# print(URL,USERNAME,PASSWORD)
|
48 |
+
def get_homework() -> str:
|
49 |
+
try:
|
50 |
+
URL,USERNAME,PASSWORD=load_credentials()
|
51 |
+
|
52 |
+
with sync_playwright() as playwright:
|
53 |
+
browser = playwright.firefox.launch(headless=True)
|
54 |
+
page = browser.new_page()
|
55 |
+
page.goto(URL,wait_until="domcontentloaded")
|
56 |
+
|
57 |
+
page.get_by_role('textbox',name='username').fill(USERNAME)
|
58 |
+
page.get_by_role('textbox',name='password').fill(PASSWORD)
|
59 |
+
page.get_by_role('button',name='login').click()
|
60 |
+
page.wait_for_url("**/dashboard")
|
61 |
+
# page.wait_for_timeout(1000)
|
62 |
+
homework = extract_homework_text(page)
|
63 |
+
browser.close()
|
64 |
+
return homework
|
65 |
+
|
66 |
+
except Exception as e:
|
67 |
+
return json.dumps(e)
|
68 |
+
|
69 |
+
def get_timetable() -> str:
|
70 |
+
try:
|
71 |
+
URL,USERNAME,PASSWORD=load_credentials()
|
72 |
+
|
73 |
+
with sync_playwright() as playwright:
|
74 |
+
browser = playwright.firefox.launch(headless=True)
|
75 |
+
page = browser.new_page()
|
76 |
+
page.goto(URL,wait_until="domcontentloaded")
|
77 |
+
|
78 |
+
page.get_by_role('textbox',name='username').fill(USERNAME)
|
79 |
+
page.get_by_role('textbox',name='password').fill(PASSWORD)
|
80 |
+
page.get_by_role('button',name='login').click()
|
81 |
+
page.wait_for_url("**/dashboard")
|
82 |
+
# page.wait_for_timeout(1000)
|
83 |
+
timetable = extract_timetable_text(page)
|
84 |
+
browser.close()
|
85 |
+
return timetable
|
86 |
+
|
87 |
+
except Exception as e:
|
88 |
+
return json.dumps(e)
|
89 |
+
|
90 |
+
if __name__=="__main__":
|
91 |
+
print(get_homework())
|
92 |
+
print()
|
93 |
+
print(get_timetable())
|