|
import os |
|
import base64 |
|
import requests |
|
import markdown |
|
import pdfkit |
|
import gradio as gr |
|
import re |
|
import random |
|
from datetime import datetime |
|
from jinja2 import Template |
|
import openai |
|
|
|
|
|
BAIDU_API_KEY = os.getenv("BAIDU_API_KEY", "") |
|
BAIDU_SECRET_KEY = os.getenv("BAIDU_SECRET_KEY", "") |
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") |
|
openai.api_key = OPENAI_API_KEY |
|
|
|
|
|
def get_access_token(api_key, secret_key): |
|
resp = requests.post( |
|
"https://aip.baidubce.com/oauth/2.0/token", |
|
params={ |
|
"grant_type":"client_credentials", |
|
"client_id": api_key, |
|
"client_secret": secret_key |
|
} |
|
) |
|
return resp.json().get("access_token") |
|
|
|
def ocr_image(image_bytes, token): |
|
img_b64 = base64.b64encode(image_bytes).decode() |
|
resp = requests.post( |
|
f"https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting?access_token={token}", |
|
headers={"Content-Type":"application/x-www-form-urlencoded"}, |
|
data={"image": img_b64, "language_type":"ENG"} |
|
) |
|
return resp.json().get("words_result", []) |
|
|
|
|
|
def highlight_brackets(text): |
|
|
|
text = re.sub(r'\[([^\[\]]+)\]', r'<span class="highlight-bracket-green">\1</span>', text) |
|
text = re.sub(r'\(([^\(\)]+)\)', r'<span class="highlight-bracket">\1</span>', text) |
|
return text.replace("\n", "<br>") |
|
|
|
|
|
def process(image): |
|
|
|
token = get_access_token(BAIDU_API_KEY, BAIDU_SECRET_KEY) |
|
words = ocr_image(image.read(), token) |
|
essay_text = "\n".join([w["words"] for w in words]) |
|
|
|
|
|
fmt_prompt = ( |
|
"请帮我整理下面的英语作文文本格式,只整理英文正文部分," |
|
"保证原汁原味(明显错误空格换行、乱码、非常用字符改正)," |
|
"拼写错误保留:\n\n" + essay_text |
|
) |
|
fm = openai.ChatCompletion.create( |
|
model="gpt-4o-mini", |
|
messages=[{"role":"user","content":fmt_prompt}] |
|
) |
|
revised = fm.choices[0].message.content |
|
|
|
|
|
corr_prompt = ( |
|
"请帮我把下面的英语作文的语法错误改正,输出改正后的文章," |
|
"原文错误用()括起来,修改部分用[]括起来:\n\n" + revised |
|
) |
|
cm = openai.ChatCompletion.create( |
|
model="gpt-4o-mini", |
|
messages=[{"role":"user","content":corr_prompt}] |
|
) |
|
corrected = cm.choices[0].message.content |
|
|
|
|
|
review_prompt = ( |
|
"下面是一份已经批改过的英语作文,请根据批注给出逐条批改意见:\n\n" |
|
+ corrected |
|
) |
|
rm = openai.ChatCompletion.create( |
|
model="gpt-4o-mini", |
|
messages=[{"role":"user","content":review_prompt}] |
|
) |
|
review = rm.choices[0].message.content |
|
|
|
|
|
rate_prompt = ( |
|
"请按照 IELTS/CEFR 写作评价体系,从语言通顺度、连贯度、词汇与语法三维度打分并给出原因:\n\n" |
|
+ revised |
|
) |
|
rr = openai.ChatCompletion.create( |
|
model="gpt-4o-mini", |
|
messages=[{"role":"user","content":rate_prompt}] |
|
) |
|
rating = rr.choices[0].message.content |
|
|
|
|
|
rewrite_prompt = ( |
|
"请使用优秀表达重写下面这篇作文,加粗可供学习的部分:\n\n" + revised |
|
) |
|
wm = openai.ChatCompletion.create( |
|
model="gpt-4o-mini", |
|
messages=[{"role":"user","content":rewrite_prompt}] |
|
) |
|
perfect = wm.choices[0].message.content |
|
|
|
|
|
code = f"{random.randint(0,9999):04}-{datetime.now().strftime('%Y%m%d%H%M%S')}" |
|
with open("app/templates/base.html", encoding="utf-8") as f: |
|
tpl = Template(f.read()) |
|
html_content = ( |
|
"<h2>原文格式化</h2>" |
|
+ markdown.markdown(revised) |
|
+ "<h2>批改结果</h2>" |
|
+ highlight_brackets(corrected) |
|
+ "<h2>批改意见</h2>" |
|
+ markdown.markdown(review) |
|
+ "<h2>评分</h2>" |
|
+ markdown.markdown(rating) |
|
+ "<h2>优秀范文</h2>" |
|
+ markdown.markdown(perfect) |
|
) |
|
full_html = tpl.render(code=code, content=html_content) |
|
|
|
|
|
html_path = f"/app/output/{code}.html" |
|
pdf_path = f"/app/output/{code}.pdf" |
|
os.makedirs("/app/output", exist_ok=True) |
|
with open(html_path, "w", encoding="utf-8") as f: |
|
f.write(full_html) |
|
pdfkit.from_string(full_html, pdf_path, options={"enable-local-file-access":""}) |
|
|
|
return full_html, html_path, pdf_path |
|
|
|
|
|
with gr.Blocks(title="英语作文批改") as demo: |
|
gr.Markdown("## 上传英语作文照片,等待批改完成后下载 HTML 或 PDF") |
|
image_in = gr.File(file_count="single", label="上传照片") |
|
output_html = gr.HTML() |
|
btn = gr.Button("开始批改") |
|
file_html = gr.File(label="下载 HTML") |
|
file_pdf = gr.File(label="下载 PDF") |
|
btn.click(fn=process, inputs=image_in, outputs=[output_html, file_html, file_pdf]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|