CatPtain commited on
Commit
8f90209
·
verified ·
1 Parent(s): b2f3c3a

Upload 17 files

Browse files
Dockerfile CHANGED
@@ -1,50 +1,27 @@
1
- # Hugging Face Spaces Dockerfile - 修复版本
2
- FROM node:18-slim
3
-
4
- # Install dependencies for Puppeteer with updated Chrome repository
5
- RUN apt-get update \
6
- && apt-get install -y wget gnupg ca-certificates \
7
- && wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /usr/share/keyrings/googlechrome-linux-keyring.gpg \
8
- && sh -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/googlechrome-linux-keyring.gpg] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \
9
- && apt-get update \
10
- && apt-get install -y google-chrome-stable fonts-liberation libappindicator3-1 libasound2 libatk-bridge2.0-0 \
11
- libdrm2 libgtk-3-0 libnspr4 libnss3 libx11-xcb1 libxcomposite1 libxdamage1 libxrandr2 \
12
- libgbm1 libxss1 libgconf-2-4 --no-install-recommends \
13
- && rm -rf /var/lib/apt/lists/*
14
-
15
- # Create a non-root user
16
- RUN groupadd -r pptruser && useradd -r -g pptruser -G audio,video pptruser \
17
- && mkdir -p /home/pptruser/Downloads \
18
- && chown -R pptruser:pptruser /home/pptruser
19
-
20
- # Set working directory
21
- WORKDIR /app
22
-
23
- # Copy package files first for better caching
24
- COPY package*.json ./
25
-
26
- # Install dependencies
27
- RUN npm ci --only=production && npm cache clean --force
28
-
29
- # Copy application code
30
- COPY . .
31
-
32
- # Change ownership to pptruser
33
- RUN chown -R pptruser:pptruser /app
34
-
35
- # Switch to non-root user
36
- USER pptruser
37
-
38
- # Set Puppeteer environment variables
39
- ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
40
- ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/google-chrome-stable
41
-
42
- # Expose port for Hugging Face Spaces
43
- EXPOSE 7860
44
-
45
- # Set environment for HF Spaces
46
- ENV PORT=7860
47
- ENV NODE_ENV=production
48
-
49
- # Start the application
50
  CMD ["npm", "start"]
 
1
+ # Railway deployment configuration
2
+ FROM ghcr.io/puppeteer/puppeteer:21.5.2
3
+
4
+ # Set working directory
5
+ WORKDIR /usr/src/app
6
+
7
+ # Copy package files
8
+ COPY package*.json ./
9
+
10
+ # Install dependencies
11
+ RUN npm ci --only=production && npm cache clean --force
12
+
13
+ # Copy application code
14
+ COPY . .
15
+
16
+ # Create user and set permissions
17
+ USER pptruser
18
+
19
+ # Expose port (Railway会自动分配端口)
20
+ EXPOSE $PORT
21
+
22
+ # Health check
23
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
24
+ CMD curl -f http://localhost:$PORT/ || exit 1
25
+
26
+ # Start the application
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  CMD ["npm", "start"]
hf-deploy/Dockerfile ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 轻量级 HF Spaces Dockerfile
2
+ FROM ghcr.io/puppeteer/puppeteer:21.5.2
3
+
4
+ # 切换到 root 用户进行安装
5
+ USER root
6
+
7
+ # 安装额外的字体和依赖
8
+ RUN apt-get update && apt-get install -y \
9
+ fonts-liberation \
10
+ fonts-dejavu-core \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # 设置工作目录
14
+ WORKDIR /usr/src/app
15
+
16
+ # 复制 package 文件
17
+ COPY package*.json ./
18
+
19
+ # 安装依赖
20
+ RUN npm ci --only=production && npm cache clean --force
21
+
22
+ # 复制应用代码
23
+ COPY . .
24
+
25
+ # 切换回非 root 用户
26
+ USER pptruser
27
+
28
+ # 设置环境变量
29
+ ENV NODE_ENV=production
30
+ ENV PORT=7860
31
+ ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/google-chrome-stable
32
+
33
+ # 暴露端口
34
+ EXPOSE 7860
35
+
36
+ # 启动命令
37
+ CMD ["npm", "start"]
hf-deploy/README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Page Screenshot API
3
+ emoji: 📸
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
10
+
11
+ # Page Screenshot API
12
+
13
+ A web service that captures screenshots of web pages using Puppeteer, optimized for Hugging Face Spaces.
14
+
15
+ ## Features
16
+ - Web page screenshot capture
17
+ - Customizable dimensions (width/height)
18
+ - Adjustable image quality
19
+ - Rate limiting for API protection
20
+ - CORS enabled for cross-origin requests
21
+ - Interactive demo interface
22
+
23
+ ## Live Demo
24
+ Visit `/demo` to try the interactive screenshot tool!
25
+
26
+ ## API Usage
27
+
28
+ ### POST /screenshot
29
+ ```json
30
+ {
31
+ "url": "https://example.com",
32
+ "width": 1280,
33
+ "height": 720,
34
+ "quality": 75
35
+ }
36
+ ```
37
+
38
+ **HF Spaces Limits:**
39
+ - Width: 100-1600px
40
+ - Height: 100-1200px
41
+ - Timeout: 15 seconds
42
+ - Rate limit: 30 requests/15min
43
+
44
+ ### GET /
45
+ Health check endpoint
46
+
47
+ ### GET /demo
48
+ Interactive demo interface
49
+
50
+ ## Example Usage
51
+ ```bash
52
+ curl -X POST /screenshot \
53
+ -H "Content-Type: application/json" \
54
+ -d '{"url": "https://google.com", "width": 1280, "height": 720}' \
55
+ --output screenshot.jpg
56
+ ```
hf-deploy/package.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "page-screenshot-api",
3
+ "version": "1.0.0",
4
+ "description": "Web page screenshot API service",
5
+ "main": "server.js",
6
+ "scripts": {
7
+ "start": "node server.js",
8
+ "dev": "nodemon server.js",
9
+ "build": "echo 'No build step required'"
10
+ },
11
+ "keywords": ["screenshot", "api", "puppeteer"],
12
+ "author": "",
13
+ "license": "MIT",
14
+ "dependencies": {
15
+ "express": "^4.18.2",
16
+ "puppeteer": "^21.5.2",
17
+ "cors": "^2.8.5",
18
+ "helmet": "^7.1.0",
19
+ "express-rate-limit": "^7.1.5"
20
+ },
21
+ "devDependencies": {
22
+ "nodemon": "^3.0.2"
23
+ },
24
+ "engines": {
25
+ "node": ">=18.0.0"
26
+ },
27
+ "puppeteer": {
28
+ "skipDownload": "true"
29
+ }
30
+ }
hf-deploy/server.js ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const express = require('express');
2
+ const puppeteer = require('puppeteer');
3
+ const cors = require('cors');
4
+ const helmet = require('helmet');
5
+ const rateLimit = require('express-rate-limit');
6
+
7
+ const app = express();
8
+ const PORT = process.env.PORT || 7860;
9
+
10
+ // 中间件配置 - HF Spaces 优化
11
+ app.use(helmet({
12
+ contentSecurityPolicy: false // HF Spaces 需要
13
+ }));
14
+ app.use(cors());
15
+ app.use(express.json({ limit: '10mb' }));
16
+
17
+ // 速率限制 - HF Spaces 调整
18
+ const limiter = rateLimit({
19
+ windowMs: 15 * 60 * 1000,
20
+ max: 30, // 进一步降低限制
21
+ message: {
22
+ error: 'Too many requests, please try again later.'
23
+ }
24
+ });
25
+ app.use('/screenshot', limiter);
26
+
27
+ // 健康检查端点
28
+ app.get('/', (req, res) => {
29
+ res.json({
30
+ message: 'Page Screenshot API - Hugging Face Spaces',
31
+ version: '1.0.0',
32
+ status: 'running',
33
+ platform: 'HuggingFace Spaces',
34
+ endpoints: {
35
+ screenshot: 'POST /screenshot',
36
+ demo: 'GET /demo',
37
+ health: 'GET /'
38
+ }
39
+ });
40
+ });
41
+
42
+ // 截图API端点 - 增强错误处理
43
+ app.post('/screenshot', async (req, res) => {
44
+ const { url, width = 1280, height = 720, quality = 75 } = req.body;
45
+
46
+ // 参数验证
47
+ if (!url) {
48
+ return res.status(400).json({
49
+ error: 'URL is required',
50
+ example: { url: 'https://example.com', width: 1280, height: 720 }
51
+ });
52
+ }
53
+
54
+ // URL格式验证
55
+ try {
56
+ const urlObj = new URL(url);
57
+ // 检查协议
58
+ if (!['http:', 'https:'].includes(urlObj.protocol)) {
59
+ return res.status(400).json({
60
+ error: 'Only HTTP and HTTPS URLs are supported'
61
+ });
62
+ }
63
+ } catch (error) {
64
+ return res.status(400).json({
65
+ error: 'Invalid URL format'
66
+ });
67
+ }
68
+
69
+ // 分辨率验证 - HF Spaces 更严格限制
70
+ if (width < 100 || width > 1600 || height < 100 || height > 1200) {
71
+ return res.status(400).json({
72
+ error: 'Width must be 100-1600px, height must be 100-1200px for HF Spaces'
73
+ });
74
+ }
75
+
76
+ let browser;
77
+ try {
78
+ // 启动浏览器 - HF Spaces 专用配置
79
+ const browserOptions = {
80
+ headless: 'new',
81
+ args: [
82
+ '--no-sandbox',
83
+ '--disable-setuid-sandbox',
84
+ '--disable-dev-shm-usage',
85
+ '--disable-gpu',
86
+ '--no-first-run',
87
+ '--no-zygote',
88
+ '--single-process',
89
+ '--disable-extensions',
90
+ '--disable-background-timer-throttling',
91
+ '--disable-backgrounding-occluded-windows',
92
+ '--disable-renderer-backgrounding',
93
+ '--disable-features=TranslateUI',
94
+ '--disable-default-apps',
95
+ '--no-default-browser-check',
96
+ '--disable-background-networking'
97
+ ]
98
+ };
99
+
100
+ // 在 HF Spaces 中使用系统 Chrome
101
+ if (process.env.PUPPETEER_EXECUTABLE_PATH) {
102
+ browserOptions.executablePath = process.env.PUPPETEER_EXECUTABLE_PATH;
103
+ }
104
+
105
+ console.log('Launching browser...');
106
+ browser = await puppeteer.launch(browserOptions);
107
+
108
+ const page = await browser.newPage();
109
+
110
+ // 设置视窗大小
111
+ await page.setViewport({
112
+ width: parseInt(width),
113
+ height: parseInt(height),
114
+ deviceScaleFactor: 1
115
+ });
116
+
117
+ // 设置用户代理和其他页面选项
118
+ await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
119
+
120
+ // 拦截不必要的资源以提高性能
121
+ await page.setRequestInterception(true);
122
+ page.on('request', (req) => {
123
+ const resourceType = req.resourceType();
124
+ if (['font', 'media'].includes(resourceType)) {
125
+ req.abort();
126
+ } else {
127
+ req.continue();
128
+ }
129
+ });
130
+
131
+ console.log(`Navigating to: ${url}`);
132
+
133
+ // 访问页面 - HF Spaces 更短超时
134
+ await page.goto(url, {
135
+ waitUntil: 'domcontentloaded', // 更快的等待条件
136
+ timeout: 15000 // 15秒超时
137
+ });
138
+
139
+ // 等待页面稳定
140
+ await page.waitForTimeout(1000);
141
+
142
+ console.log('Taking screenshot...');
143
+
144
+ // 截图
145
+ const screenshot = await page.screenshot({
146
+ type: 'jpeg',
147
+ quality: Math.max(10, Math.min(100, parseInt(quality))),
148
+ fullPage: false
149
+ });
150
+
151
+ console.log(`Screenshot taken: ${screenshot.length} bytes`);
152
+
153
+ // 设置响应头
154
+ res.set({
155
+ 'Content-Type': 'image/jpeg',
156
+ 'Content-Length': screenshot.length,
157
+ 'Cache-Control': 'no-cache',
158
+ 'Content-Disposition': `inline; filename="screenshot-${Date.now()}.jpg"`
159
+ });
160
+
161
+ res.send(screenshot);
162
+
163
+ } catch (error) {
164
+ console.error('Screenshot error:', error.message);
165
+ const errorResponse = {
166
+ error: 'Failed to capture screenshot',
167
+ message: error.message
168
+ };
169
+
170
+ // 根据错误类型提供更好的错误信息
171
+ if (error.message.includes('timeout')) {
172
+ errorResponse.suggestion = 'Try a simpler webpage or reduce timeout';
173
+ } else if (error.message.includes('net::')) {
174
+ errorResponse.suggestion = 'Check if the URL is accessible';
175
+ }
176
+
177
+ res.status(500).json(errorResponse);
178
+ } finally {
179
+ if (browser) {
180
+ try {
181
+ await browser.close();
182
+ console.log('Browser closed');
183
+ } catch (closeError) {
184
+ console.error('Error closing browser:', closeError.message);
185
+ }
186
+ }
187
+ }
188
+ });
189
+
190
+ // HF Spaces 演示界面 - 改进版
191
+ app.get('/demo', (req, res) => {
192
+ res.send(`
193
+ <!DOCTYPE html>
194
+ <html>
195
+ <head>
196
+ <title>📸 Page Screenshot API Demo</title>
197
+ <meta charset="utf-8">
198
+ <meta name="viewport" content="width=device-width, initial-scale=1">
199
+ <style>
200
+ body {
201
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
202
+ max-width: 800px; margin: 0 auto; padding: 20px;
203
+ background: #f8f9fa;
204
+ }
205
+ .container { background: white; padding: 30px; border-radius: 10px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }
206
+ .form-group { margin: 20px 0; }
207
+ label { display: block; margin-bottom: 8px; font-weight: 600; color: #333; }
208
+ input[type="text"], input[type="number"] {
209
+ width: 100%; padding: 12px; border: 2px solid #e1e5e9;
210
+ border-radius: 6px; font-size: 16px; box-sizing: border-box;
211
+ }
212
+ input:focus { border-color: #007bff; outline: none; }
213
+ .input-row { display: flex; gap: 15px; }
214
+ .input-row > div { flex: 1; }
215
+ button {
216
+ background: linear-gradient(135deg, #007bff, #0056b3);
217
+ color: white; border: none; padding: 14px 28px;
218
+ border-radius: 6px; cursor: pointer; font-size: 16px; font-weight: 600;
219
+ transition: transform 0.2s;
220
+ }
221
+ button:hover { transform: translateY(-1px); }
222
+ button:disabled { background: #6c757d; cursor: not-allowed; transform: none; }
223
+ #result { margin-top: 30px; }
224
+ .loading { color: #007bff; font-weight: 500; }
225
+ .error { color: #dc3545; background: #f8d7da; padding: 15px; border-radius: 6px; }
226
+ .success img { max-width: 100%; border-radius: 6px; box-shadow: 0 4px 15px rgba(0,0,0,0.1); }
227
+ .examples { margin: 20px 0; }
228
+ .example-btn {
229
+ background: #e9ecef; color: #495057; border: none;
230
+ padding: 8px 12px; margin: 5px; border-radius: 4px; cursor: pointer; font-size: 14px;
231
+ }
232
+ .example-btn:hover { background: #dee2e6; }
233
+ </style>
234
+ </head>
235
+ <body>
236
+ <div class="container">
237
+ <h1>📸 Page Screenshot API</h1>
238
+ <p>Enter a URL to capture a screenshot. Optimized for Hugging Face Spaces.</p>
239
+
240
+ <div class="examples">
241
+ <strong>Try these examples:</strong><br>
242
+ <button class="example-btn" onclick="setExample('https://www.google.com')">Google</button>
243
+ <button class="example-btn" onclick="setExample('https://www.github.com')">GitHub</button>
244
+ <button class="example-btn" onclick="setExample('https://www.wikipedia.org')">Wikipedia</button>
245
+ <button class="example-btn" onclick="setExample('https://news.ycombinator.com')">Hacker News</button>
246
+ </div>
247
+
248
+ <div class="form-group">
249
+ <label for="url">URL:</label>
250
+ <input type="text" id="url" placeholder="https://example.com" value="https://www.google.com">
251
+ </div>
252
+
253
+ <div class="input-row">
254
+ <div>
255
+ <label for="width">Width (px):</label>
256
+ <input type="number" id="width" value="1280" min="100" max="1600">
257
+ </div>
258
+ <div>
259
+ <label for="height">Height (px):</label>
260
+ <input type="number" id="height" value="720" min="100" max="1200">
261
+ </div>
262
+ <div>
263
+ <label for="quality">Quality:</label>
264
+ <input type="number" id="quality" value="75" min="10" max="100">
265
+ </div>
266
+ </div>
267
+
268
+ <button onclick="takeScreenshot()" id="captureBtn">Take Screenshot</button>
269
+
270
+ <div id="result"></div>
271
+ </div>
272
+
273
+ <script>
274
+ function setExample(url) {
275
+ document.getElementById('url').value = url;
276
+ }
277
+
278
+ async function takeScreenshot() {
279
+ const url = document.getElementById('url').value;
280
+ const width = parseInt(document.getElementById('width').value);
281
+ const height = parseInt(document.getElementById('height').value);
282
+ const quality = parseInt(document.getElementById('quality').value);
283
+ const btn = document.getElementById('captureBtn');
284
+
285
+ if (!url) {
286
+ document.getElementById('result').innerHTML = '<div class="error">Please enter a URL</div>';
287
+ return;
288
+ }
289
+
290
+ btn.disabled = true;
291
+ btn.textContent = 'Taking Screenshot...';
292
+ document.getElementById('result').innerHTML = '<div class="loading">📸 Capturing screenshot, please wait...</div>';
293
+
294
+ try {
295
+ const response = await fetch('/screenshot', {
296
+ method: 'POST',
297
+ headers: {
298
+ 'Content-Type': 'application/json',
299
+ },
300
+ body: JSON.stringify({ url, width, height, quality })
301
+ });
302
+
303
+ if (response.ok) {
304
+ const blob = await response.blob();
305
+ const imageUrl = URL.createObjectURL(blob);
306
+ const size = (blob.size / 1024).toFixed(1);
307
+ document.getElementById('result').innerHTML =
308
+ '<div class="success"><h3>Screenshot Result:</h3>' +
309
+ '<p>Size: ' + size + ' KB | Dimensions: ' + width + 'x' + height + '</p>' +
310
+ '<img src="' + imageUrl + '" alt="Screenshot"><br><br>' +
311
+ '<a href="' + imageUrl + '" download="screenshot.jpg" style="background: #28a745; color: white; padding: 10px 20px; text-decoration: none; border-radius: 5px;">Download Image</a></div>';
312
+ } else {
313
+ const error = await response.json();
314
+ document.getElementById('result').innerHTML =
315
+ '<div class="error"><strong>Error:</strong> ' + error.error +
316
+ (error.suggestion ? '<br><strong>Suggestion:</strong> ' + error.suggestion : '') + '</div>';
317
+ }
318
+ } catch (error) {
319
+ document.getElementById('result').innerHTML =
320
+ '<div class="error"><strong>Network Error:</strong> ' + error.message + '</div>';
321
+ } finally {
322
+ btn.disabled = false;
323
+ btn.textContent = 'Take Screenshot';
324
+ }
325
+ }
326
+
327
+ // Enter key support
328
+ document.getElementById('url').addEventListener('keypress', function(e) {
329
+ if (e.key === 'Enter') {
330
+ takeScreenshot();
331
+ }
332
+ });
333
+ </script>
334
+ </body>
335
+ </html>
336
+ `);
337
+ });
338
+
339
+ // 错误处理中间件
340
+ app.use((error, req, res, next) => {
341
+ console.error('Unhandled error:', error);
342
+ res.status(500).json({
343
+ error: 'Internal server error'
344
+ });
345
+ });
346
+
347
+ // 404处理
348
+ app.use((req, res) => {
349
+ res.status(404).json({
350
+ error: 'Endpoint not found'
351
+ });
352
+ });
353
+
354
+ app.listen(PORT, '0.0.0.0', () => {
355
+ console.log(`Screenshot API server running on port ${PORT} for Hugging Face Spaces`);
356
+ });