mistpe commited on
Commit
53f22b0
·
verified ·
1 Parent(s): 166ba46

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +364 -0
app.py ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+ import tempfile
5
+ import zipfile
6
+ import requests
7
+ import rarfile
8
+ import tarfile
9
+ from flask import Flask, render_template, request, jsonify, send_file
10
+ from urllib.parse import urlparse
11
+ from werkzeug.utils import secure_filename
12
+ import json
13
+ from pathlib import Path
14
+ import threading
15
+ import time
16
+
17
+ app = Flask(__name__)
18
+ app.config['UPLOAD_FOLDER'] = 'temp_repos'
19
+ app.config['OUTPUT_FOLDER'] = 'outputs'
20
+ app.config['MAX_CONTENT_LENGTH'] = 500 * 1024 * 1024 # 500MB
21
+
22
+ # 支持的文件类型
23
+ ALLOWED_EXTENSIONS = {
24
+ 'zip', 'rar', '7z', 'tar', 'tar.gz', 'tgz', 'tar.bz2', 'tar.xz',
25
+ 'py', 'js', 'html', 'css', 'java', 'cpp', 'c', 'h', 'php', 'rb',
26
+ 'go', 'rs', 'ts', 'vue', 'jsx', 'tsx', 'md', 'txt', 'json', 'xml',
27
+ 'yaml', 'yml', 'ini', 'cfg', 'conf', 'sh', 'bat', 'ps1', 'sql'
28
+ }
29
+
30
+ # 确保必要的文件夹存在
31
+ os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
32
+ os.makedirs(app.config['OUTPUT_FOLDER'], exist_ok=True)
33
+
34
+ # 全局变量用于跟踪下载进度
35
+ download_progress = {}
36
+
37
+ def allowed_file(filename):
38
+ """检查文件是否允许上传"""
39
+ if '.' not in filename:
40
+ return False
41
+ extension = filename.rsplit('.', 1)[1].lower()
42
+ # 特殊处理复合扩展名
43
+ if filename.lower().endswith(('.tar.gz', '.tar.bz2', '.tar.xz')):
44
+ return True
45
+ return extension in ALLOWED_EXTENSIONS
46
+
47
+ def extract_archive(file_path, extract_to, session_id):
48
+ """解压各种格式的压缩包"""
49
+ try:
50
+ download_progress[session_id] = {'status': 'extracting', 'progress': 30}
51
+
52
+ if file_path.lower().endswith('.zip'):
53
+ with zipfile.ZipFile(file_path, 'r') as zip_ref:
54
+ zip_ref.extractall(extract_to)
55
+
56
+ elif file_path.lower().endswith('.rar'):
57
+ with rarfile.RarFile(file_path, 'r') as rar_ref:
58
+ rar_ref.extractall(extract_to)
59
+
60
+ elif file_path.lower().endswith(('.tar', '.tar.gz', '.tgz', '.tar.bz2', '.tar.xz')):
61
+ mode = 'r'
62
+ if file_path.lower().endswith(('.tar.gz', '.tgz')):
63
+ mode = 'r:gz'
64
+ elif file_path.lower().endswith('.tar.bz2'):
65
+ mode = 'r:bz2'
66
+ elif file_path.lower().endswith('.tar.xz'):
67
+ mode = 'r:xz'
68
+
69
+ with tarfile.open(file_path, mode) as tar_ref:
70
+ tar_ref.extractall(extract_to)
71
+
72
+ elif file_path.lower().endswith('.7z'):
73
+ # 对于7z文件,尝试使用7zip命令行工具
74
+ import subprocess
75
+ try:
76
+ subprocess.run(['7z', 'x', file_path, f'-o{extract_to}'], check=True, capture_output=True)
77
+ except (subprocess.CalledProcessError, FileNotFoundError):
78
+ raise Exception("7z文件需要安装7zip命令行工具")
79
+ else:
80
+ raise Exception(f"不支持的压缩格式: {file_path}")
81
+
82
+ download_progress[session_id] = {'status': 'completed', 'progress': 100}
83
+ return True
84
+
85
+ except Exception as e:
86
+ download_progress[session_id] = {'status': 'error', 'message': f'解压失败: {str(e)}'}
87
+ return False
88
+
89
+ def process_uploaded_file(file_path, session_id):
90
+ """处理上传的文件"""
91
+ try:
92
+ download_progress[session_id] = {'status': 'processing', 'progress': 10}
93
+
94
+ filename = os.path.basename(file_path)
95
+ extract_dir = os.path.join(app.config['UPLOAD_FOLDER'], f"{session_id}_uploaded")
96
+
97
+ if os.path.exists(extract_dir):
98
+ shutil.rmtree(extract_dir)
99
+ os.makedirs(extract_dir)
100
+
101
+ # 判断是压缩文件还是单个文件
102
+ if any(filename.lower().endswith(ext) for ext in ['.zip', '.rar', '.7z', '.tar', '.tar.gz', '.tgz', '.tar.bz2', '.tar.xz']):
103
+ # 解压压缩文件
104
+ success = extract_archive(file_path, extract_dir, session_id)
105
+ if not success:
106
+ return None
107
+ else:
108
+ # 单个文件,直接复制
109
+ download_progress[session_id] = {'status': 'copying', 'progress': 50}
110
+ shutil.copy2(file_path, extract_dir)
111
+ download_progress[session_id] = {'status': 'completed', 'progress': 100}
112
+
113
+ return extract_dir
114
+
115
+ except Exception as e:
116
+ download_progress[session_id] = {'status': 'error', 'message': str(e)}
117
+ return None
118
+ """从GitHub URL提取用户名和仓库名"""
119
+ parsed = urlparse(url)
120
+ path_parts = parsed.path.strip('/').split('/')
121
+ if len(path_parts) >= 2:
122
+ return path_parts[0], path_parts[1]
123
+ return None, None
124
+
125
+ def download_github_repo(url, session_id):
126
+ """下载GitHub仓库"""
127
+ try:
128
+ download_progress[session_id] = {'status': 'starting', 'progress': 0}
129
+
130
+ username, repo_name = extract_github_info(url)
131
+ if not username or not repo_name:
132
+ download_progress[session_id] = {'status': 'error', 'message': '无效的GitHub URL'}
133
+ return None
134
+
135
+ # 创建临时目录
136
+ temp_dir = os.path.join(app.config['UPLOAD_FOLDER'], f"{session_id}_{repo_name}")
137
+ if os.path.exists(temp_dir):
138
+ shutil.rmtree(temp_dir)
139
+
140
+ download_progress[session_id] = {'status': 'downloading', 'progress': 20}
141
+
142
+ # 下载ZIP文件
143
+ zip_url = f"https://github.com/{username}/{repo_name}/archive/refs/heads/main.zip"
144
+
145
+ # 尝试main分支,如果失败则尝试master分支
146
+ response = requests.get(zip_url, stream=True)
147
+ if response.status_code != 200:
148
+ zip_url = f"https://github.com/{username}/{repo_name}/archive/refs/heads/master.zip"
149
+ response = requests.get(zip_url, stream=True)
150
+
151
+ if response.status_code != 200:
152
+ download_progress[session_id] = {'status': 'error', 'message': '无法下载仓库,请检查URL是否正确'}
153
+ return None
154
+
155
+ download_progress[session_id] = {'status': 'downloading', 'progress': 50}
156
+
157
+ # 保存ZIP文件
158
+ zip_path = os.path.join(app.config['UPLOAD_FOLDER'], f"{session_id}_{repo_name}.zip")
159
+ with open(zip_path, 'wb') as f:
160
+ for chunk in response.iter_content(chunk_size=8192):
161
+ f.write(chunk)
162
+
163
+ download_progress[session_id] = {'status': 'extracting', 'progress': 70}
164
+
165
+ # 解压ZIP文件
166
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
167
+ zip_ref.extractall(app.config['UPLOAD_FOLDER'])
168
+
169
+ # 找到解压后的文件夹
170
+ extracted_folders = [f for f in os.listdir(app.config['UPLOAD_FOLDER'])
171
+ if f.startswith(f"{repo_name}-") and os.path.isdir(os.path.join(app.config['UPLOAD_FOLDER'], f))]
172
+
173
+ if extracted_folders:
174
+ extracted_path = os.path.join(app.config['UPLOAD_FOLDER'], extracted_folders[0])
175
+ os.rename(extracted_path, temp_dir)
176
+
177
+ # 清理ZIP文件
178
+ os.remove(zip_path)
179
+
180
+ download_progress[session_id] = {'status': 'completed', 'progress': 100}
181
+ return temp_dir
182
+
183
+ except Exception as e:
184
+ download_progress[session_id] = {'status': 'error', 'message': str(e)}
185
+ return None
186
+
187
+ def get_file_tree(directory, ignore_dirs=None):
188
+ """获取文件树结构"""
189
+ if ignore_dirs is None:
190
+ ignore_dirs = set()
191
+
192
+ def should_ignore(path):
193
+ return any(ignore_pattern in path for ignore_pattern in [
194
+ 'node_modules', '__pycache__', '.git', '.idea', 'venv', 'env',
195
+ '.DS_Store', 'Thumbs.db', '*.pyc', '*.pyo', '*.pyd'
196
+ ])
197
+
198
+ tree = []
199
+
200
+ try:
201
+ for root, dirs, files in os.walk(directory):
202
+ # 过滤要忽略的目录
203
+ dirs[:] = [d for d in dirs if not should_ignore(os.path.join(root, d))]
204
+
205
+ level = root.replace(directory, '').count(os.sep)
206
+ indent = ' ' * 2 * level
207
+
208
+ folder_name = os.path.basename(root)
209
+ if level > 0:
210
+ tree.append({
211
+ 'type': 'folder',
212
+ 'name': folder_name,
213
+ 'path': root,
214
+ 'level': level
215
+ })
216
+
217
+ sub_indent = ' ' * 2 * (level + 1)
218
+ for file in files:
219
+ if not should_ignore(file):
220
+ file_path = os.path.join(root, file)
221
+ tree.append({
222
+ 'type': 'file',
223
+ 'name': file,
224
+ 'path': file_path,
225
+ 'level': level + 1,
226
+ 'size': os.path.getsize(file_path) if os.path.exists(file_path) else 0
227
+ })
228
+ except Exception as e:
229
+ print(f"Error generating file tree: {e}")
230
+
231
+ return tree
232
+
233
+ def copy_selected_files_to_txt(source_dir, output_file, selected_files):
234
+ """将选中的文件内容复制到txt文件"""
235
+ try:
236
+ with open(output_file, 'w', encoding='utf-8') as outfile:
237
+ for file_path in selected_files:
238
+ if os.path.exists(file_path) and os.path.isfile(file_path):
239
+ relative_path = os.path.relpath(file_path, source_dir)
240
+
241
+ outfile.write(f"{'='*50}\n")
242
+ outfile.write(f"文件路径: {relative_path}\n")
243
+ outfile.write(f"{'='*50}\n\n")
244
+
245
+ try:
246
+ with open(file_path, 'r', encoding='utf-8') as infile:
247
+ content = infile.read()
248
+ outfile.write(content)
249
+ except UnicodeDecodeError:
250
+ try:
251
+ with open(file_path, 'r', encoding='gbk') as infile:
252
+ content = infile.read()
253
+ outfile.write(content)
254
+ except:
255
+ outfile.write("[二进制文件或编码错误,无法显示内容]\n")
256
+ except Exception as e:
257
+ outfile.write(f"[读取文件时出错: {str(e)}]\n")
258
+
259
+ outfile.write("\n\n")
260
+ return True
261
+ except Exception as e:
262
+ print(f"Error copying files: {e}")
263
+ return False
264
+
265
+ @app.route('/')
266
+ def index():
267
+ return render_template('index.html')
268
+
269
+ @app.route('/upload', methods=['POST'])
270
+ def upload_file():
271
+ if 'file' not in request.files:
272
+ return jsonify({'error': '没有选择文件'}), 400
273
+
274
+ file = request.files['file']
275
+ if file.filename == '':
276
+ return jsonify({'error': '没有选择文件'}), 400
277
+
278
+ if not allowed_file(file.filename):
279
+ return jsonify({'error': '不支持的文件格式'}), 400
280
+
281
+ session_id = str(int(time.time()))
282
+
283
+ try:
284
+ # 保存上传的文件
285
+ filename = secure_filename(file.filename)
286
+ file_path = os.path.join(app.config['UPLOAD_FOLDER'], f"{session_id}_{filename}")
287
+ file.save(file_path)
288
+
289
+ # 在后台线程中处理文件
290
+ thread = threading.Thread(target=process_uploaded_file, args=(file_path, session_id))
291
+ thread.start()
292
+
293
+ return jsonify({'session_id': session_id, 'filename': filename})
294
+
295
+ except Exception as e:
296
+ return jsonify({'error': f'上传失败: {str(e)}'}), 500
297
+ def download_repo():
298
+ data = request.get_json()
299
+ github_url = data.get('url')
300
+ session_id = data.get('session_id', str(int(time.time())))
301
+
302
+ if not github_url:
303
+ return jsonify({'error': '请提供GitHub URL'}), 400
304
+
305
+ # 在后台线程中下载
306
+ thread = threading.Thread(target=download_github_repo, args=(github_url, session_id))
307
+ thread.start()
308
+
309
+ return jsonify({'session_id': session_id})
310
+
311
+ @app.route('/progress/<session_id>')
312
+ def get_progress(session_id):
313
+ return jsonify(download_progress.get(session_id, {'status': 'unknown'}))
314
+
315
+ @app.route('/files/<session_id>')
316
+ def get_files(session_id):
317
+ # 查找对应的目录
318
+ for item in os.listdir(app.config['UPLOAD_FOLDER']):
319
+ if item.startswith(session_id) and os.path.isdir(os.path.join(app.config['UPLOAD_FOLDER'], item)):
320
+ repo_path = os.path.join(app.config['UPLOAD_FOLDER'], item)
321
+ file_tree = get_file_tree(repo_path)
322
+ return jsonify({'files': file_tree, 'repo_path': repo_path})
323
+
324
+ return jsonify({'error': '未找到仓库文件'}), 404
325
+
326
+ @app.route('/merge', methods=['POST'])
327
+ def merge_files():
328
+ data = request.get_json()
329
+ session_id = data.get('session_id')
330
+ selected_files = data.get('selected_files', [])
331
+
332
+ if not session_id or not selected_files:
333
+ return jsonify({'error': '缺少必要参数'}), 400
334
+
335
+ # 查找仓库路径
336
+ repo_path = None
337
+ for item in os.listdir(app.config['UPLOAD_FOLDER']):
338
+ if item.startswith(session_id) and os.path.isdir(os.path.join(app.config['UPLOAD_FOLDER'], item)):
339
+ repo_path = os.path.join(app.config['UPLOAD_FOLDER'], item)
340
+ break
341
+
342
+ if not repo_path:
343
+ return jsonify({'error': '未找到仓库'}), 404
344
+
345
+ # 生成输出文件
346
+ output_filename = f"merged_{session_id}.txt"
347
+ output_path = os.path.join(app.config['OUTPUT_FOLDER'], output_filename)
348
+
349
+ success = copy_selected_files_to_txt(repo_path, output_path, selected_files)
350
+
351
+ if success:
352
+ return jsonify({'download_url': f'/download_result/{output_filename}'})
353
+ else:
354
+ return jsonify({'error': '合并文件时出错'}), 500
355
+
356
+ @app.route('/download_result/<filename>')
357
+ def download_result(filename):
358
+ file_path = os.path.join(app.config['OUTPUT_FOLDER'], filename)
359
+ if os.path.exists(file_path):
360
+ return send_file(file_path, as_attachment=True, download_name=filename)
361
+ return "文件不存在", 404
362
+
363
+ if __name__ == '__main__':
364
+ app.run(host='0.0.0.0', port=7860, debug=True)