proxy / app.py
aigenai's picture
Update app.py
59bddc7 verified
raw
history blame
12.5 kB
import gradio as gr
import requests
import re
import time
import logging
from functools import lru_cache
from urllib.parse import urlparse, urljoin
from typing import Dict, Any, Optional, List
from dataclasses import dataclass
from datetime import datetime
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
@dataclass
class ProxyResponse:
"""代理响应数据类"""
status: int
content: str
headers: Dict[str, str]
redirect_url: Optional[str] = None
error: Optional[str] = None
class Config:
"""配置类"""
ASSET_URL = "https://1pages.nbid.bid/"
PREFIX = "/"
JSDELIVR = 0
CACHE_TTL = 3600
MAX_RETRIES = 3
TIMEOUT = 10
RATE_LIMIT = {
"window_ms": 15 * 60 * 1000, # 15分钟
"max": 100 # 限制每个IP最多100个请求
}
WHITE_LIST: List[str] = [] # 白名单
# 请求头
DEFAULT_HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
# CORS设置
CORS = {
"allow_origins": ["*"],
"allow_methods": ["GET", "POST", "OPTIONS"],
"allow_headers": ["*"],
"max_age": 1728000
}
# URL模式
PATTERNS = {
"releases": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/(?:releases|archive)\/.*$",
"blob": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/(?:blob|raw)\/.*$",
"git": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/(?:info|git-).*$",
"raw": r"^(?:https?:\/\/)?raw\.(?:githubusercontent|github)\.com\/.+?\/.+?\/.+?\/.+$",
"gist": r"^(?:https?:\/\/)?gist\.(?:githubusercontent|github)\.com\/.+?\/.+?\/.+$",
"tags": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/tags.*$"
}
class RateLimiter:
"""请求频率限制器"""
def __init__(self):
self.request_records: Dict[str, List[float]] = {}
def is_allowed(self, ip: str) -> bool:
now = time.time() * 1000
window_start = now - Config.RATE_LIMIT["window_ms"]
if ip not in self.request_records:
self.request_records[ip] = []
# 清理过期记录
self.request_records[ip] = [t for t in self.request_records[ip] if t > window_start]
if len(self.request_records[ip]) >= Config.RATE_LIMIT["max"]:
return False
self.request_records[ip].append(now)
return True
def get_remaining(self, ip: str) -> int:
"""获取剩余请求次数"""
if ip not in self.request_records:
return Config.RATE_LIMIT["max"]
return Config.RATE_LIMIT["max"] - len(self.request_records[ip])
class GitHubProxy:
"""GitHub代理核心类"""
def __init__(self):
self.rate_limiter = RateLimiter()
self.session = requests.Session()
self.session.headers.update(Config.DEFAULT_HEADERS)
def check_url(self, url: str) -> bool:
"""检查URL是否匹配GitHub模式"""
return any(re.search(pattern, url, re.I) for pattern in Config.PATTERNS.values())
def check_white_list(self, url: str) -> bool:
"""检查白名单"""
if not Config.WHITE_LIST:
return True
return any(white_item in url for white_item in Config.WHITE_LIST)
@lru_cache(maxsize=1000)
def fetch_github_content(self, url: str, method: str = "GET", stream: bool = False) -> ProxyResponse:
"""获取GitHub内容(带缓存)"""
try:
response = self.session.request(
method=method,
url=url,
timeout=Config.TIMEOUT,
allow_redirects=False,
stream=stream
)
headers = dict(response.headers)
# 处理重定向
if response.is_redirect:
redirect_url = response.headers["Location"]
if self.check_url(redirect_url):
redirect_url = Config.PREFIX + redirect_url
return ProxyResponse(
status=response.status_code,
content="",
headers=headers,
redirect_url=redirect_url
)
# 如果是流式响应,直接返回response对象
if stream:
return ProxyResponse(
status=response.status_code,
content=response, # 返回response对象以支持流式传输
headers=headers
)
# 检查是否是二进制内容
content_type = response.headers.get('content-type', '')
is_binary = not any(text_type in content_type.lower() for text_type in ['text', 'json', 'xml', 'html'])
content = response.content if is_binary else response.text
return ProxyResponse(
status=response.status_code,
content=content,
headers=headers
)
except requests.Timeout:
return ProxyResponse(
status=504,
content="Request Timeout",
headers={},
error="请求超时"
)
except Exception as e:
logging.error(f"Fetch error: {str(e)}")
return ProxyResponse(
status=500,
content=str(e),
headers={},
error="服务器内部错误"
)
def proxy_request(self, url: str, request: gr.Request) -> Dict[str, Any]:
"""处理代理请求"""
# 记录请求
logging.info(f"Proxy request from {request.client.host} to {url}")
# 检查频率限制
if not self.rate_limiter.is_allowed(request.client.host):
return {
"status": 429,
"content": "Too Many Requests",
"headers": {},
"error": "请求过于频繁,请稍后再试",
"rate_limit": {
"remaining": self.rate_limiter.get_remaining(request.client.host),
"reset": int((time.time() * 1000 + Config.RATE_LIMIT["window_ms"]) / 1000)
}
}
# 检查白名单
if not self.check_white_list(url):
return {
"status": 403,
"content": "Access Denied",
"headers": {},
"error": "访问被拒绝"
}
# 处理URL
if not url.startswith(("http://", "https://")):
url = "https://" + url
# 检查URL是否为GitHub链接
if not self.check_url(url):
return {
"status": 400,
"content": "Invalid GitHub URL",
"headers": {},
"error": "无效的GitHub URL"
}
# 处理jsDelivr重定向
if Config.JSDELIVR and re.search(Config.PATTERNS["blob"], url):
url = url.replace("/blob/", "@").replace("github.com", "cdn.jsdelivr.net/gh")
return {
"status": 302,
"content": "",
"headers": {"Location": url},
"redirect_url": url
}
# 获取内容
response = self.fetch_github_content(url)
result = {
"status": response.status,
"content": response.content,
"headers": response.headers,
"timestamp": datetime.now().isoformat()
}
if response.redirect_url:
result["redirect_url"] = response.redirect_url
if response.error:
result["error"] = response.error
return result
def create_interface():
"""创建Gradio界面"""
proxy = GitHubProxy()
def proxy_download(request: gr.Request):
"""处理直接代理请求"""
# 从路径中提取GitHub URL
path = request.headers.get('x-path', '')
if not path:
return gr.Error("无效的请求路径")
# 移除开头的斜杠
if path.startswith('/'):
path = path[1:]
# 检查并处理URL
if not path.startswith(('http://', 'https://')):
path = 'https://' + path
try:
# 获取代理响应
response = proxy.proxy_request(path, request)
if 'error' in response:
return gr.Error(response['error'])
if 'redirect_url' in response:
return gr.Redirect(response['redirect_url'])
# 获取文件内容(使用流式传输)
proxy_response = proxy.fetch_github_content(path, stream=True)
if proxy_response.error:
return gr.Error(proxy_response.error)
# 返回文件响应
headers = {
'Content-Type': proxy_response.headers.get('content-type', 'application/octet-stream'),
'Content-Disposition': proxy_response.headers.get('content-disposition', 'attachment'),
}
return gr.FileBinaryResponse(
proxy_response.content.raw,
headers=headers,
status=proxy_response.status
)
except Exception as e:
logging.error(f"Proxy error: {str(e)}")
return gr.Error(f"代理请求失败: {str(e)}")
# 添加直接代理路由
app = gr.App()
app.add_route("/.*", proxy_download, method="GET")
with gr.Blocks(title="GitHub Proxy", theme=gr.themes.Soft()) as blocks:
gr.Markdown("""
# 🚀 GitHub Proxy
### 使用方法
1. 直接访问: `https://your-domain.com/github-url`
2. 或者在下方输入GitHub URL进行测试
### 功能特点
- ✨ 支持多种GitHub URL格式
- 🔄 自动处理重定向
- 💾 响应缓存
- ⚡ CDN加速支持
- 🛡️ 请求频率限制
### 支持的URL类型
- GitHub Release/Archive
- GitHub Raw/Blob
- GitHub Gist
- Raw GitHub Content
""")
with gr.Row():
url_input = gr.Textbox(
label="GitHub URL",
placeholder="输入GitHub URL,例如:github.com/user/repo/blob/master/file.txt",
scale=4
)
submit_btn = gr.Button("获取内容", scale=1)
with gr.Row():
with gr.Column():
status = gr.Textbox(label="状态")
headers = gr.JSON(label="响应头")
with gr.Column():
content = gr.Textbox(label="内容", max_lines=20)
error = gr.Textbox(label="错误信息", visible=False)
def handle_request(url: str, request: gr.Request):
result = proxy.proxy_request(url, request)
# 更新UI
error_visible = "error" in result
error_msg = result.get("error", "")
return {
status: f"状态码: {result['status']}",
headers: result["headers"],
content: result["content"],
error: error_msg,
error: gr.update(visible=error_visible, value=error_msg)
}
submit_btn.click(
fn=handle_request,
inputs=[url_input],
outputs=[status, headers, content, error]
)
# 添加示例
gr.Examples(
examples=[
["github.com/microsoft/vscode/blob/main/README.md"],
["raw.githubusercontent.com/microsoft/vscode/main/README.md"],
["github.com/ollama/ollama/releases/download/v0.5.1/ollama-windows-amd64.zip"]
],
inputs=url_input
)
app.blocks = blocks
return app
if __name__ == "__main__":
app = create_interface()
app.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True,
quiet=False
)