import os import base64 import requests import markdown import pdfkit import gradio as gr import re import random from datetime import datetime from jinja2 import Template import openai # —— 环境变量读取 —— # BAIDU_API_KEY = os.getenv("BAIDU_API_KEY", "") BAIDU_SECRET_KEY = os.getenv("BAIDU_SECRET_KEY", "") OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") openai.api_key = OPENAI_API_KEY # —— Baidu OCR —— # def get_access_token(api_key, secret_key): resp = requests.post( "https://aip.baidubce.com/oauth/2.0/token", params={ "grant_type":"client_credentials", "client_id": api_key, "client_secret": secret_key } ) return resp.json().get("access_token") def ocr_image(image_bytes, token): img_b64 = base64.b64encode(image_bytes).decode() resp = requests.post( f"https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting?access_token={token}", headers={"Content-Type":"application/x-www-form-urlencoded"}, data={"image": img_b64, "language_type":"ENG"} ) return resp.json().get("words_result", []) # —— 文本高亮 —— # def highlight_brackets(text): # 中括号绿色;小括号红色 text = re.sub(r'\[([^\[\]]+)\]', r'\1', text) text = re.sub(r'\(([^\(\)]+)\)', r'\1', text) return text.replace("\n", "
") # —— 主处理函数 —— # def process(image): # 1. OCR token = get_access_token(BAIDU_API_KEY, BAIDU_SECRET_KEY) words = ocr_image(image.read(), token) essay_text = "\n".join([w["words"] for w in words]) # 2. 格式化原文 fmt_prompt = ( "请帮我整理下面的英语作文文本格式,只整理英文正文部分," "保证原汁原味(明显错误空格换行、乱码、非常用字符改正)," "拼写错误保留:\n\n" + essay_text ) fm = openai.ChatCompletion.create( model="gpt-4o-mini", messages=[{"role":"user","content":fmt_prompt}] ) revised = fm.choices[0].message.content # 3. 批改 corr_prompt = ( "请帮我把下面的英语作文的语法错误改正,输出改正后的文章," "原文错误用()括起来,修改部分用[]括起来:\n\n" + revised ) cm = openai.ChatCompletion.create( model="gpt-4o-mini", messages=[{"role":"user","content":corr_prompt}] ) corrected = cm.choices[0].message.content # 4. 批改意见 review_prompt = ( "下面是一份已经批改过的英语作文,请根据批注给出逐条批改意见:\n\n" + corrected ) rm = openai.ChatCompletion.create( model="gpt-4o-mini", messages=[{"role":"user","content":review_prompt}] ) review = rm.choices[0].message.content # 5. 评分 rate_prompt = ( "请按照 IELTS/CEFR 写作评价体系,从语言通顺度、连贯度、词汇与语法三维度打分并给出原因:\n\n" + revised ) rr = openai.ChatCompletion.create( model="gpt-4o-mini", messages=[{"role":"user","content":rate_prompt}] ) rating = rr.choices[0].message.content # 6. 优秀范文 rewrite_prompt = ( "请使用优秀表达重写下面这篇作文,加粗可供学习的部分:\n\n" + revised ) wm = openai.ChatCompletion.create( model="gpt-4o-mini", messages=[{"role":"user","content":rewrite_prompt}] ) perfect = wm.choices[0].message.content # 7. 渲染 HTML code = f"{random.randint(0,9999):04}-{datetime.now().strftime('%Y%m%d%H%M%S')}" with open("app/templates/base.html", encoding="utf-8") as f: tpl = Template(f.read()) html_content = ( "

原文格式化

" + markdown.markdown(revised) + "

批改结果

" + highlight_brackets(corrected) + "

批改意见

" + markdown.markdown(review) + "

评分

" + markdown.markdown(rating) + "

优秀范文

" + markdown.markdown(perfect) ) full_html = tpl.render(code=code, content=html_content) # 写文件 html_path = f"/app/output/{code}.html" pdf_path = f"/app/output/{code}.pdf" os.makedirs("/app/output", exist_ok=True) with open(html_path, "w", encoding="utf-8") as f: f.write(full_html) pdfkit.from_string(full_html, pdf_path, options={"enable-local-file-access":""}) return full_html, html_path, pdf_path # —— Gradio 接口 —— # with gr.Blocks(title="英语作文批改") as demo: gr.Markdown("## 上传英语作文照片,等待批改完成后下载 HTML 或 PDF") image_in = gr.File(file_count="single", label="上传照片") output_html = gr.HTML() btn = gr.Button("开始批改") file_html = gr.File(label="下载 HTML") file_pdf = gr.File(label="下载 PDF") btn.click(fn=process, inputs=image_in, outputs=[output_html, file_html, file_pdf]) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)