Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Commit 
							
							·
						
						c006dbc
	
1
								Parent(s):
							
							1b19224
								
feat: add network filter
Browse files- competitions/runner.py +9 -1
- competitions/utils.py +162 -2
- other_files/network_filter.so +0 -0
    	
        competitions/runner.py
    CHANGED
    
    | @@ -15,7 +15,7 @@ from loguru import logger | |
| 15 |  | 
| 16 | 
             
            from competitions.enums import SubmissionStatus, ErrorMessage
         | 
| 17 | 
             
            from competitions.info import CompetitionInfo
         | 
| 18 | 
            -
            from competitions.utils import user_token_api, space_cleaner
         | 
| 19 |  | 
| 20 |  | 
| 21 | 
             
            @dataclass
         | 
| @@ -201,6 +201,14 @@ class JobRunner: | |
| 201 | 
             
                    )
         | 
| 202 | 
             
                    with open(f"{client_code_local_dir}/README.md", "w", encoding="utf-8") as f:
         | 
| 203 | 
             
                        f.write(self._create_readme(space_id))
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 204 | 
             
                    try:
         | 
| 205 | 
             
                        api.upload_folder(
         | 
| 206 | 
             
                            repo_id=space_id,
         | 
|  | |
| 15 |  | 
| 16 | 
             
            from competitions.enums import SubmissionStatus, ErrorMessage
         | 
| 17 | 
             
            from competitions.info import CompetitionInfo
         | 
| 18 | 
            +
            from competitions.utils import user_token_api, space_cleaner, dockerfile_modifier
         | 
| 19 |  | 
| 20 |  | 
| 21 | 
             
            @dataclass
         | 
|  | |
| 201 | 
             
                    )
         | 
| 202 | 
             
                    with open(f"{client_code_local_dir}/README.md", "w", encoding="utf-8") as f:
         | 
| 203 | 
             
                        f.write(self._create_readme(space_id))
         | 
| 204 | 
            +
                    shutil.copyfile("./other_files/network_filter.so", os.path.join(client_code_local_dir, "network_filter.so"))
         | 
| 205 | 
            +
                    for filename in os.listdir(client_code_local_dir):
         | 
| 206 | 
            +
                        if filename.lower() == "dockerfile":
         | 
| 207 | 
            +
                            filepath = os.path.join(client_code_local_dir, filename)
         | 
| 208 | 
            +
                            with open(filepath, "r", encoding="utf-8") as f:
         | 
| 209 | 
            +
                                dockerfile_content = f.read()
         | 
| 210 | 
            +
                            with open(filepath, "w", encoding="utf-8") as f:
         | 
| 211 | 
            +
                                f.write(dockerfile_modifier.modify_dockerfile_content(dockerfile_content)[0])
         | 
| 212 | 
             
                    try:
         | 
| 213 | 
             
                        api.upload_folder(
         | 
| 214 | 
             
                            repo_id=space_id,
         | 
    	
        competitions/utils.py
    CHANGED
    
    | @@ -3,12 +3,12 @@ import json | |
| 3 | 
             
            import os
         | 
| 4 | 
             
            import shlex
         | 
| 5 | 
             
            import subprocess
         | 
| 6 | 
            -
            import  | 
| 7 | 
             
            import threading
         | 
| 8 | 
             
            import uuid
         | 
| 9 | 
             
            import base64
         | 
| 10 | 
             
            import glob
         | 
| 11 | 
            -
            from typing import Optional, Dict, Any, List, Literal
         | 
| 12 | 
             
            from collections import defaultdict
         | 
| 13 | 
             
            from datetime import datetime, timezone, timedelta
         | 
| 14 |  | 
| @@ -755,3 +755,163 @@ leaderboard_api = LeaderboardApi( | |
| 755 | 
             
                hf_token=os.environ.get("HF_TOKEN", None),
         | 
| 756 | 
             
                competition_id=os.environ.get("COMPETITION_ID")
         | 
| 757 | 
             
            )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 3 | 
             
            import os
         | 
| 4 | 
             
            import shlex
         | 
| 5 | 
             
            import subprocess
         | 
| 6 | 
            +
            import re
         | 
| 7 | 
             
            import threading
         | 
| 8 | 
             
            import uuid
         | 
| 9 | 
             
            import base64
         | 
| 10 | 
             
            import glob
         | 
| 11 | 
            +
            from typing import Optional, Dict, Any, List, Literal, Tuple
         | 
| 12 | 
             
            from collections import defaultdict
         | 
| 13 | 
             
            from datetime import datetime, timezone, timedelta
         | 
| 14 |  | 
|  | |
| 755 | 
             
                hf_token=os.environ.get("HF_TOKEN", None),
         | 
| 756 | 
             
                competition_id=os.environ.get("COMPETITION_ID")
         | 
| 757 | 
             
            )
         | 
| 758 | 
            +
             | 
| 759 | 
            +
             | 
| 760 | 
            +
             | 
| 761 | 
            +
            class DockerfileModifier:
         | 
| 762 | 
            +
                def __init__(self, allowed_hosts: str, source_so_path: str = "./network_filter.so"):
         | 
| 763 | 
            +
                    self.allowed_hosts = allowed_hosts
         | 
| 764 | 
            +
                    self.source_so_path = source_so_path
         | 
| 765 | 
            +
                    self.tatget_so_dir = "/_app_extensions"
         | 
| 766 | 
            +
                    self.tatget_so_path = os.path.join(self.tatget_so_dir, "network_filter.so")
         | 
| 767 | 
            +
                    self.preload_prefix = f'LD_PRELOAD={self.tatget_so_path} ALLOWED_HOSTS="{allowed_hosts}"'
         | 
| 768 | 
            +
                    
         | 
| 769 | 
            +
                def parse_dockerfile_line(self, line: str) -> Tuple[str, str, str]:
         | 
| 770 | 
            +
                    """
         | 
| 771 | 
            +
                    解析 Dockerfile 行,返回 (指令名, 原始命令, 格式类型)
         | 
| 772 | 
            +
                    格式类型: 'exec' (JSON数组) 或 'shell' (shell命令)
         | 
| 773 | 
            +
                    """
         | 
| 774 | 
            +
                    line = line.strip()
         | 
| 775 | 
            +
                    
         | 
| 776 | 
            +
                    # 匹配 CMD 或 ENTRYPOINT
         | 
| 777 | 
            +
                    cmd_match = re.match(r'^(CMD|ENTRYPOINT)\s+(.+)$', line, re.IGNORECASE)
         | 
| 778 | 
            +
                    if not cmd_match:
         | 
| 779 | 
            +
                        return "", "", ""
         | 
| 780 | 
            +
                        
         | 
| 781 | 
            +
                    instruction = cmd_match.group(1).upper()
         | 
| 782 | 
            +
                    command_part = cmd_match.group(2).strip()
         | 
| 783 | 
            +
                    
         | 
| 784 | 
            +
                    # 判断是 exec 格式 (JSON数组) 还是 shell 格式
         | 
| 785 | 
            +
                    if command_part.startswith('[') and command_part.endswith(']'):
         | 
| 786 | 
            +
                        return instruction, command_part, "exec"
         | 
| 787 | 
            +
                    else:
         | 
| 788 | 
            +
                        return instruction, command_part, "shell"
         | 
| 789 | 
            +
                
         | 
| 790 | 
            +
                def modify_shell_format(self, command: str) -> str:
         | 
| 791 | 
            +
                    """修改 shell 格式的命令"""
         | 
| 792 | 
            +
                    # 在原命令前添加环境变量
         | 
| 793 | 
            +
                    return f'{self.preload_prefix} {command}'
         | 
| 794 | 
            +
                
         | 
| 795 | 
            +
                def modify_exec_format(self, command: str) -> str:
         | 
| 796 | 
            +
                    """修改 exec 格式 (JSON数组) 的命令"""
         | 
| 797 | 
            +
                    try:
         | 
| 798 | 
            +
                        # 解析 JSON 数组格式
         | 
| 799 | 
            +
                        # 移除外层的方括号
         | 
| 800 | 
            +
                        inner = command[1:-1].strip()
         | 
| 801 | 
            +
                        
         | 
| 802 | 
            +
                        # 简单的 JSON 数组解析
         | 
| 803 | 
            +
                        parts = []
         | 
| 804 | 
            +
                        current = ""
         | 
| 805 | 
            +
                        in_quotes = False
         | 
| 806 | 
            +
                        escape_next = False
         | 
| 807 | 
            +
                        
         | 
| 808 | 
            +
                        for char in inner:
         | 
| 809 | 
            +
                            if escape_next:
         | 
| 810 | 
            +
                                current += char
         | 
| 811 | 
            +
                                escape_next = False
         | 
| 812 | 
            +
                            elif char == '\\':
         | 
| 813 | 
            +
                                current += char
         | 
| 814 | 
            +
                                escape_next = True
         | 
| 815 | 
            +
                            elif char == '"' and not escape_next:
         | 
| 816 | 
            +
                                in_quotes = not in_quotes
         | 
| 817 | 
            +
                                current += char
         | 
| 818 | 
            +
                            elif char == ',' and not in_quotes:
         | 
| 819 | 
            +
                                parts.append(current.strip())
         | 
| 820 | 
            +
                                current = ""
         | 
| 821 | 
            +
                            else:
         | 
| 822 | 
            +
                                current += char
         | 
| 823 | 
            +
                        
         | 
| 824 | 
            +
                        if current.strip():
         | 
| 825 | 
            +
                            parts.append(current.strip())
         | 
| 826 | 
            +
                        
         | 
| 827 | 
            +
                        # 移除引号并处理转义
         | 
| 828 | 
            +
                        cleaned_parts = []
         | 
| 829 | 
            +
                        for part in parts:
         | 
| 830 | 
            +
                            part = part.strip()
         | 
| 831 | 
            +
                            if part.startswith('"') and part.endswith('"'):
         | 
| 832 | 
            +
                                part = part[1:-1]
         | 
| 833 | 
            +
                            # 处理基本的转义字符
         | 
| 834 | 
            +
                            part = part.replace('\\"', '"').replace('\\\\', '\\')
         | 
| 835 | 
            +
                            cleaned_parts.append(part)
         | 
| 836 | 
            +
                        
         | 
| 837 | 
            +
                        if not cleaned_parts:
         | 
| 838 | 
            +
                            return command
         | 
| 839 | 
            +
                        
         | 
| 840 | 
            +
                        # 构建新的命令
         | 
| 841 | 
            +
                        # 第一个元素通常是 shell (/bin/sh, /bin/bash 等)
         | 
| 842 | 
            +
                        # 如果第一个元素是 shell,修改执行的命令
         | 
| 843 | 
            +
                        if len(cleaned_parts) >= 3 and cleaned_parts[0] in ['/bin/sh', '/bin/bash', 'sh', 'bash']:
         | 
| 844 | 
            +
                            if cleaned_parts[1] == '-c':
         | 
| 845 | 
            +
                                # 格式: ["/bin/sh", "-c", "command"]
         | 
| 846 | 
            +
                                original_cmd = cleaned_parts[2]
         | 
| 847 | 
            +
                                new_cmd = f'{self.preload_prefix} {original_cmd}'
         | 
| 848 | 
            +
                                new_parts = [cleaned_parts[0], cleaned_parts[1], new_cmd] + cleaned_parts[3:]
         | 
| 849 | 
            +
                            else:
         | 
| 850 | 
            +
                                # 直接在现有命令前添加环境变量,通过 shell 执行
         | 
| 851 | 
            +
                                original_cmd = ' '.join(cleaned_parts[1:])
         | 
| 852 | 
            +
                                new_cmd = f'{self.preload_prefix} {original_cmd}'
         | 
| 853 | 
            +
                                new_parts = [cleaned_parts[0], '-c', new_cmd]
         | 
| 854 | 
            +
                        else:
         | 
| 855 | 
            +
                            # 直接执行的命令,需要通过 shell 包装
         | 
| 856 | 
            +
                            original_cmd = ' '.join(cleaned_parts)
         | 
| 857 | 
            +
                            new_parts = ['/bin/sh', '-c', f'{self.preload_prefix} {original_cmd}']
         | 
| 858 | 
            +
                        
         | 
| 859 | 
            +
                        # 重新构建 JSON 数组
         | 
| 860 | 
            +
                        escaped_parts = []
         | 
| 861 | 
            +
                        for part in new_parts:
         | 
| 862 | 
            +
                            # 转义引号和反斜杠
         | 
| 863 | 
            +
                            escaped = part.replace('\\', '\\\\').replace('"', '\\"')
         | 
| 864 | 
            +
                            escaped_parts.append(f'"{escaped}"')
         | 
| 865 | 
            +
                        
         | 
| 866 | 
            +
                        return '[' + ', '.join(escaped_parts) + ']'
         | 
| 867 | 
            +
                        
         | 
| 868 | 
            +
                    except Exception as e:
         | 
| 869 | 
            +
                        print(f"警告: 解析 exec 格式失败: {e}")
         | 
| 870 | 
            +
                        print(f"原始命令: {command}")
         | 
| 871 | 
            +
                        # 如果解析失败,转换为 shell 格式
         | 
| 872 | 
            +
                        return f'{self.preload_prefix} {command}'
         | 
| 873 | 
            +
                
         | 
| 874 | 
            +
                def modify_dockerfile_content(self, content: str) -> Tuple[str, List[str]]:
         | 
| 875 | 
            +
                    """
         | 
| 876 | 
            +
                    修改 Dockerfile 内容
         | 
| 877 | 
            +
                    返回: (修改后的内容, 修改日志)
         | 
| 878 | 
            +
                    """
         | 
| 879 | 
            +
                    lines = content.splitlines()
         | 
| 880 | 
            +
                    modified_lines = []
         | 
| 881 | 
            +
                    changes = []
         | 
| 882 | 
            +
                    
         | 
| 883 | 
            +
                    for i, line in enumerate(lines, 1):
         | 
| 884 | 
            +
                        instruction, command, format_type = self.parse_dockerfile_line(line)
         | 
| 885 | 
            +
                        
         | 
| 886 | 
            +
                        if instruction in ['CMD', 'ENTRYPOINT'] and command:
         | 
| 887 | 
            +
                            if format_type == "shell":
         | 
| 888 | 
            +
                                new_command = self.modify_shell_format(command)
         | 
| 889 | 
            +
                                new_line = f'{instruction} {new_command}'
         | 
| 890 | 
            +
                            elif format_type == "exec":
         | 
| 891 | 
            +
                                new_command = self.modify_exec_format(command)
         | 
| 892 | 
            +
                                new_line = f'{instruction} {new_command}'
         | 
| 893 | 
            +
                            else:
         | 
| 894 | 
            +
                                new_line = line
         | 
| 895 | 
            +
                            
         | 
| 896 | 
            +
                            changes.append(f"第 {i} 行: {instruction} 指令已修改")
         | 
| 897 | 
            +
                            changes.append(f"  原始: {line}")
         | 
| 898 | 
            +
                            changes.append(f"  修改: {new_line}")
         | 
| 899 | 
            +
                            modified_lines.append(new_line)
         | 
| 900 | 
            +
                        else:
         | 
| 901 | 
            +
                            modified_lines.append(line)
         | 
| 902 | 
            +
             | 
| 903 | 
            +
                    last_user = None
         | 
| 904 | 
            +
                    for line in modified_lines[::-1]:
         | 
| 905 | 
            +
                        if line.startswith("USER"):
         | 
| 906 | 
            +
                            last_user = line.split()[1].strip()
         | 
| 907 | 
            +
             | 
| 908 | 
            +
                    if last_user is None:
         | 
| 909 | 
            +
                        modified_lines.insert(-1, f"COPY {self.source_so_path}" + f" {self.tatget_so_path}")
         | 
| 910 | 
            +
                    else:
         | 
| 911 | 
            +
                        modified_lines.insert(-1, f"COPY --chown={last_user}:{last_user} {self.source_so_path} {self.tatget_so_path}")
         | 
| 912 | 
            +
                        modified_lines.insert(-1, f"RUN chown -R {last_user}:{last_user} {self.tatget_so_dir}")
         | 
| 913 | 
            +
             | 
| 914 | 
            +
                    return '\n'.join(modified_lines), changes
         | 
| 915 | 
            +
             | 
| 916 | 
            +
             | 
| 917 | 
            +
            dockerfile_modifier = DockerfileModifier("127.0.0.1,xdimlab-hugsim-web-server-0.hf.space")
         | 
    	
        other_files/network_filter.so
    ADDED
    
    | Binary file (16.8 kB). View file | 
|  | 
