File size: 5,173 Bytes
ffa3318
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import os
import base64
import requests
import markdown
import pdfkit
import gradio as gr
import re
import random
from datetime import datetime
from jinja2 import Template
import openai

# —— 环境变量读取 —— #
BAIDU_API_KEY    = os.getenv("BAIDU_API_KEY", "")
BAIDU_SECRET_KEY = os.getenv("BAIDU_SECRET_KEY", "")
OPENAI_API_KEY   = os.getenv("OPENAI_API_KEY", "")
openai.api_key   = OPENAI_API_KEY

# —— Baidu OCR —— #
def get_access_token(api_key, secret_key):
    resp = requests.post(
        "https://aip.baidubce.com/oauth/2.0/token",
        params={
            "grant_type":"client_credentials",
            "client_id": api_key,
            "client_secret": secret_key
        }
    )
    return resp.json().get("access_token")

def ocr_image(image_bytes, token):
    img_b64 = base64.b64encode(image_bytes).decode()
    resp = requests.post(
        f"https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting?access_token={token}",
        headers={"Content-Type":"application/x-www-form-urlencoded"},
        data={"image": img_b64, "language_type":"ENG"}
    )
    return resp.json().get("words_result", [])

# —— 文本高亮 —— #
def highlight_brackets(text):
    # 中括号绿色;小括号红色
    text = re.sub(r'\[([^\[\]]+)\]', r'<span class="highlight-bracket-green">\1</span>', text)
    text = re.sub(r'\(([^\(\)]+)\)', r'<span class="highlight-bracket">\1</span>', text)
    return text.replace("\n", "<br>")

# —— 主处理函数 —— #
def process(image):
    # 1. OCR
    token = get_access_token(BAIDU_API_KEY, BAIDU_SECRET_KEY)
    words = ocr_image(image.read(), token)
    essay_text = "\n".join([w["words"] for w in words])

    # 2. 格式化原文
    fmt_prompt = (
        "请帮我整理下面的英语作文文本格式,只整理英文正文部分,"
        "保证原汁原味(明显错误空格换行、乱码、非常用字符改正),"
        "拼写错误保留:\n\n" + essay_text
    )
    fm = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=[{"role":"user","content":fmt_prompt}]
    )
    revised = fm.choices[0].message.content

    # 3. 批改
    corr_prompt = (
        "请帮我把下面的英语作文的语法错误改正,输出改正后的文章,"
        "原文错误用()括起来,修改部分用[]括起来:\n\n" + revised
    )
    cm = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=[{"role":"user","content":corr_prompt}]
    )
    corrected = cm.choices[0].message.content

    # 4. 批改意见
    review_prompt = (
        "下面是一份已经批改过的英语作文,请根据批注给出逐条批改意见:\n\n"
        + corrected
    )
    rm = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=[{"role":"user","content":review_prompt}]
    )
    review = rm.choices[0].message.content

    # 5. 评分
    rate_prompt = (
        "请按照 IELTS/CEFR 写作评价体系,从语言通顺度、连贯度、词汇与语法三维度打分并给出原因:\n\n"
        + revised
    )
    rr = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=[{"role":"user","content":rate_prompt}]
    )
    rating = rr.choices[0].message.content

    # 6. 优秀范文
    rewrite_prompt = (
        "请使用优秀表达重写下面这篇作文,加粗可供学习的部分:\n\n" + revised
    )
    wm = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=[{"role":"user","content":rewrite_prompt}]
    )
    perfect = wm.choices[0].message.content

    # 7. 渲染 HTML
    code = f"{random.randint(0,9999):04}-{datetime.now().strftime('%Y%m%d%H%M%S')}"
    with open("app/templates/base.html", encoding="utf-8") as f:
        tpl = Template(f.read())
    html_content = (
        "<h2>原文格式化</h2>"
        + markdown.markdown(revised)
        + "<h2>批改结果</h2>"
        + highlight_brackets(corrected)
        + "<h2>批改意见</h2>"
        + markdown.markdown(review)
        + "<h2>评分</h2>"
        + markdown.markdown(rating)
        + "<h2>优秀范文</h2>"
        + markdown.markdown(perfect)
    )
    full_html = tpl.render(code=code, content=html_content)

    # 写文件
    html_path = f"/app/output/{code}.html"
    pdf_path  = f"/app/output/{code}.pdf"
    os.makedirs("/app/output", exist_ok=True)
    with open(html_path, "w", encoding="utf-8") as f:
        f.write(full_html)
    pdfkit.from_string(full_html, pdf_path, options={"enable-local-file-access":""})

    return full_html, html_path, pdf_path

# —— Gradio 接口 —— #
with gr.Blocks(title="英语作文批改") as demo:
    gr.Markdown("## 上传英语作文照片,等待批改完成后下载 HTML 或 PDF")
    image_in = gr.File(file_count="single", label="上传照片")
    output_html = gr.HTML()
    btn = gr.Button("开始批改")
    file_html = gr.File(label="下载 HTML")
    file_pdf  = gr.File(label="下载 PDF")
    btn.click(fn=process, inputs=image_in, outputs=[output_html, file_html, file_pdf])

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)