Spaces:

Gofor5
/

Zhixue_Network_File_Download

Sleeping

File size: 16,624 Bytes

import re
import threading
import time
from pathlib import Path
import gradio as gr
import requests
from tqdm import tqdm
import tempfile

requests.packages.urllib3.disable_warnings()

reference_subject = {
    '语文': '01',
    '历史': '12',
    '数学': '02',
    '生物': '13',
    '英语': '03',
    '通用技术': '102',
    '信息技术': '26',  
    '物理': '05',
    '政治': '27',
    '化学': '06',
    '地理':"14"
}
subject_codes={
    '01': '语文', 
    '12': '历史', 
    '02': '数学', 
    '13': '生物', 
    '03': '英语', 
    '102': '通用技术', 
    '26': '信息技术', 
    '05': '物理', 
    '27': '政治', 
    '06': '化学',
    '14':'地理'
}
headers = [
    {
        "Accept": "application/json, text/plain, */*",
        "Accept-Encoding": "gzip, deflate, br, zstd",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Connection": "keep-alive",
        "Host": "www.zhixue.com",
        "Referer": "https://www.zhixue.com/middlehomework/web-student/views/",
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
        "appName": "com.iflytek.zxzy.web.zx.stu",
        "sec-ch-ua": '"Not A(Brand";v="8", "Chromium";v="132", "Google Chrome";v="132"',
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": '"Windows"'
    },
    {
        "Host": "www.zhixue.com",
        "sucOriginAppKey": "zhixue_student",
        "User-Agent": "zhixue_student/1.0.2026 (iPhone; iOS 16.2; Scale/3.00)",
        "appName": "com.zhixue.student",
        "Connection": "keep-alive",
        "Accept-Language": "zh-Hans-CN;q=1, zh-Hant-CN;q=0.9, en-CN;q=0.8",
        "Accept": "*/*",
        "Accept-Encoding": "gzip, deflate, br"
    },
    {
        "Host": "mhw.zhixue.com",
        "Content-Type": "application/json",
        "Accept": "application/json, text/plain, */*",
        "appName": "com.zhixue.student",
        "sucOriginAppKey": "zhixue_student",
        "Accept-Language": "zh-CN,zh-Hans;q=0.9",
        "Origin": "https://mhw.zhixue.com",
        "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 16_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko)",
        "Referer": "https://mhw.zhixue.com/zhixuestudent/views/homeworkReport/homework-report.html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive"
    }
]

def get_token():
    global token
    response = requests.get("https://www.zhixue.com/middleweb/newToken", headers=headers[0], verify=False)
    response.encoding = "utf-8"
    response = response.json()
    result = response["result"]["token"]
    if result:
        token = result
        return True
    else:
        print("获取 token 失败。")
        return False
    
def format_time(timestamp):
    return time.strftime("%Y-%m-%d %H:%M", time.localtime(timestamp // 1000))
   
def get(url):
    headers[1].update({"Host": url.split("/")[2], "sucUserToken": token})
    response = requests.get(url, headers=headers[1], verify=False)
    response.encoding = "utf-8"
    return response.json()

def post(url, data):
    headers[2].update({
        "Host": url.split("/")[2],
        "Origin": f'https://{url.split("/")[2]}',
        "sucUserToken": token,
        "Authorization": token
    })
    response = requests.post(url, headers=headers[2], json=data, verify=False)
    response.encoding = "utf-8"
    return response.json()

def parse_range(s, max_value):
    result = []
    for item in s.split():
        if "-" in item:
            l = item.split("-")
            if len(l) == 2 and l[0].isdigit() and l[1].isdigit():
                begin = int(l[0]) - 1
                end = int(l[1]) - 1
                if not (begin < 0 and end < 0 or begin >= max_value and end >= max_value):
                    step = -1 if begin > end else 1
                    for i in range(begin, end + step, step):
                        if 0 <= i < max_value and not i in result:
                            result.append(i)
        elif item.isdigit():
            n = int(item) - 1
            if 0 <= n < max_value and not n in result:
                result.append(n)
    return result

def to_file(file, source_type, name=""):
    result = ({"name": name or Path(file).name, "path": file, "is_text": bool(name)} if isinstance(file, str) else
              {"name": name, "path": file["description"], "is_text": True} if file["fileType"] == 5 else
              {"name": file.get("name", "") or Path(file["path"]).name, "path": file["path"], "is_text": False})
    result["name"] = re.sub('[\\\\/:*?"<>|]', "_", result["name"])
    result["type"] = source_type
    return result

def analyze_homework(homework, include_text, uid):  
    hwId = homework["hwId"]
    hwType = homework["hwType"]
    stuHwId = homework["stuHwId"]
    file_list = []
    data = {"base": {"appId": "APP"}, "params": {"hwId": hwId, "stuHwId": stuHwId, "studentId": uid}}
    
    if hwType == 102:
        response = post("https://mhw.zhixue.com/hwreport/question/getStuReportDetail", data)
        if "result" in response:
            result = response["result"]
            file_list.append(to_file(result["hwDescription"], "题目", result["hwTitle"] + "_说明.txt"))
            for problem in result["mainTopics"]:
                content = problem["content"] + problem["answerHtml"] + problem["analysisHtml"]
                file_list += [to_file(path, "题目") for path in re.findall('bigger="(.+?)"', content)]
                file_list += [to_file(path, "提交") for item in problem["subTopics"] for path in item["answerResList"]]
    
    elif hwType == 105:
        response = post("https://mhw.zhixue.com/hw/homework/attachment/list", data)
        file_list += [to_file(item, "题目") for item in response["result"]]
        response = post("https://mhw.zhixue.com/hwreport/question/getStuReportDetail", data)
        if "result" in response:
            result = response["result"]
            file_list.append(to_file(result["hwDescription"], "题目", result["hwTitle"] + "_说明.txt"))
            file_list += [to_file(item, "答案") for item in result.get("answerAttachList", [])]
            for problem in result["mainTopics"]:
                file_list += [to_file(path, "提交") for item in problem["subTopics"] for path in item["answerResList"]]
    
    elif hwType == 107:
        response = post("https://mhw.zhixue.com/hw/clock/answer/getClockHomeworkDetail", data)
        result = response["result"]
        file_list.append(to_file(result["description"], "题目", result["title"] + "_说明.txt"))
        file_list += ([to_file(item, "题目") for item in result.get("hwTopicAttachments", [])]
                      + [to_file(item, "答案") for item in result.get("hwAnswerAttachments", [])]
                      + [to_file(item, "答案") for item in
                         result["hwClockRecordPreviewResponses"][0].get("teacherAnswerAttachments", [])]
                      + [to_file(item, "提交", result["title"] + "_提交.txt") for item in
                         result["hwClockRecordPreviewResponses"][0].get("answerAttachments", [])])
    
    file_list = [file for file in file_list if file["path"] and (include_text or not file["is_text"])]
    return file_list

def query_homework(uid, tlsysSessionId, subject, status, max_count):
    headers[0].update({"Cookie": f"tlsysSessionId={tlsysSessionId}"})
    successful = get_token()
    if not successful:
        return  "获取 token 失败",""
    page_size = max_count
    if subject == ["语文", "数学", "英语", "物理", "化学", "生物", "地理", "历史", "政治",'通用技术', '信息技术']:
        subjects = ["-1"]
    else:
        subjects = [reference_subject[i] for i in subject]
    if status == "全部":
        status = ""
    elif status == "已完成":
        status = "1"
    else:
        status = "0"
    fetch_list = []
    if status != "1":
        fetch_list += [{"subject": subject, "status": 0} for subject in subjects]
    if status != "0":
        fetch_list += [{"subject": subject, "status": 1} for subject in subjects]
    global homework_list
    homework_list = []
    timestamps = [int(time.time() * 1000)] * len(fetch_list)
    finished = [False] * len(fetch_list)
    while not all(finished):
        print("\x9B1F\x9B0J", end="")
        index = len(homework_list)
        for i in tqdm(range(len(fetch_list)), unit=""):
            if finished[i]:
                continue
            response = get("https://mhw.zhixue.com/homework_middle_service/stuapp/getStudentHomeWorkList"
                           f'?completeStatus={fetch_list[i]["status"]}&createTime={timestamps[i]}&pageIndex=2'
                           f'&pageSize={page_size}&subjectCode={fetch_list[i]["subject"]}&token={token}')
            if response["code"] != 200:
                raise RuntimeError("获取作业列表失败")
            result_list = response["result"]["list"]
            homework_list += result_list
            if len(result_list) < page_size:
                finished[i] = True
            if result_list:
                timestamps[i] = result_list[-1]["beginTime"]
        
        print("\x9B1F\x9B0J", end="")
        homework_list_temp=[]
        global homework_list_oringin
        homework_list_oringin = homework_list
        for i in range(index, len(homework_list)):
            begin_time = format_time(homework_list[i]["beginTime"])
            end_time = format_time(homework_list[i]["endTime"])
            homework_list_temp.append(f"[{homework_list[i]['subjectName']}]|{homework_list[i]['hwTitle']}|{begin_time}-{end_time}")
        homework_list = homework_list_temp
        return token, gr.update(choices=homework_list, value=[])

def parse_homework(token, include_text, homework_selection, uid):  
    result = []
    for i in homework_selection:
        for j in range(len(homework_list)):
            if i == homework_list[j]:
                result.append(j)
    selected_homework = result
    file_list = []
    for i in tqdm(selected_homework, unit=""):
        file_list += analyze_homework(homework_list_oringin[i], include_text, uid)
        global file_list_output
        file_list_output = [file["name"] for file in file_list]
        global homework_downloaded_path
        homework_downloaded_path = [file["path"] for file in file_list]
        global homework_is_text
        homework_is_text = [file["is_text"] for file in file_list]
    return gr.update(choices=file_list_output, value=[])

def download_file(homework_view):
    download_list = []
    for i in homework_view:
        for j in range(len(file_list_output)):
            if i == file_list_output[j]:
                if not homework_is_text[j]:
                    download_list.append(homework_downloaded_path[j])
                else:
                    with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as f:
                        f.write(homework_downloaded_path[j])
                        temp_path = f.name
                    download_list.append(temp_path)
    return download_list

def save_config(uid, tlsysSessionId):
    config_content = f"uid={uid}\ntlsysSessionId={tlsysSessionId}"
    with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as f:
        f.write(config_content)
        return f.name

def load_config(file):
    if not file:
        return None, None
    with open(file.name, "r") as f:
        content = f.read()
    uid = re.search(r"uid=(\S+)", content)
    tlsysSessionId = re.search(r"tlsysSessionId=(\S+)", content)
    return (uid.group(1) if uid else None, (tlsysSessionId.group(1) if tlsysSessionId else None))

with gr.Blocks(title="智学网作业获取器") as demo:
    gr.Markdown("# 🚀 智学网作业获取器")
    gr.Markdown("## Backfront Created by Levrium,UI Design by Start_ten")
    gr.Markdown("操作说明请见https://zhuanlan.zhihu.com/p/691808543")
    
    # 新增配置文件区域
    with gr.Row():
        with gr.Column(scale=2):
            config_upload = gr.File(label="上传配置文件", type="filepath", file_types=[".txt"])
        with gr.Column(scale=1):
            load_config_btn = gr.Button("识别配置文件", variant="secondary")
            save_config_btn = gr.Button("导出配置文件", variant="secondary")
        with gr.Column(scale=2):
            config_download = gr.File(label="下载配置文件", interactive=False)
    
    with gr.Row():
        with gr.Column():
            gr.Markdown("### 📝 查询作业")
            uid = gr.Textbox(
                label="uid",
                placeholder="请输入uid...",
            )
            tlsysSessionId = gr.Textbox(
                label="tlsysSessionId",
                placeholder="请输入tlsysSessionId...", 
            )
            with gr.Row():
                with gr.Column(scale=2):
                    subject = gr.CheckboxGroup(
                    choices=["语文", "数学", "英语", "物理", "化学", "生物", "地理", "历史", "政治",'通用技术', '信息技术'],
                    label="具体学科",
                    value=["语文", "数学", "英语", "物理", "化学", "生物", "地理", "历史", "政治",'通用技术', '信息技术']
                )
                with gr.Column(scale=1):
                    all_chosen = gr.Button(
                        value="全选",
                        variant="secondary"
                    )
                    all_chosen.click(
                        fn=lambda: ["语文", "数学", "英语", "物理", "化学", "生物", "地理", "历史", "政治",'通用技术', '信息技术'],
                        inputs=[],
                        outputs=[subject]
                    )
                    all_clear = gr.Button(
                        value="全不选",
                        variant="secondary"
                    )
                    all_clear.click(
                        fn=lambda: [],
                        inputs=[],
                        outputs=[subject]
                    )

            status = gr.Radio(
                choices=["全部", "已完成", "未完成"],
                label="作业状态",
                value="全部"
            )
            max_count = gr.Slider(
                label="最大请求作业数",
                value=10,
                minimum=1,
                maximum=50,
                step=1
            )
        with gr.Column():
            token = gr.Textbox(
                label="TOKEN",
                interactive=False
            )

            homework_selection = gr.CheckboxGroup(
                label="作业列表（可多选）",
                choices=[],
                interactive=True
            )
            submit_btn = gr.Button("查询作业", variant="primary")
            submit_btn.click(
                fn=query_homework,
                inputs=[uid, tlsysSessionId, subject, status, max_count],
                outputs=[token, homework_selection ]
            )
    gr.Markdown("---")        
    with gr.Row():
        with gr.Column():
            gr.Markdown("### 📄 解析作业并下载")
            text_parse = gr.Checkbox(
                label="是否解析题目、提交的文本？",
                value=False
            )
            homework_view = gr.CheckboxGroup(
                label="作业内容",
                choices=[],
                interactive=True
            )
            submit_btn = gr.Button("解析作业", variant="primary")
            submit_btn.click(
                fn=parse_homework,
                inputs=[token, text_parse, homework_selection, uid],
                outputs=[homework_view]
            )
        with gr.Column():
            file_output = gr.File(label="作业文件", interactive=False)
            download_btn = gr.Button("下载作业", variant="primary")
            download_btn.click(
                fn=download_file,
                inputs=[homework_view],
                outputs=[file_output]
            )
    
    # 配置文件功能绑定
    load_config_btn.click(
        fn=load_config,
        inputs=[config_upload],
        outputs=[uid, tlsysSessionId]
    )
    
    save_config_btn.click(
        fn=save_config,
        inputs=[uid, tlsysSessionId],
        outputs=[config_download]
    )

if __name__ == "__main__":
    demo.launch()