Tbb1111 commited on
Commit
7c5b58c
·
verified ·
1 Parent(s): 7b6f181

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -41
app.py CHANGED
@@ -1,8 +1,5 @@
1
  import gradio as gr
2
  from transformers import T5ForConditionalGeneration, T5Tokenizer
3
- import torch
4
- import fitz # PyMuPDF
5
- from fpdf import FPDF
6
 
7
  # 加载 T5 模型和分词器
8
  model_name = "t5-small" # 可以根据需要调整模型大小
@@ -10,52 +7,24 @@ model = T5ForConditionalGeneration.from_pretrained(model_name)
10
  tokenizer = T5Tokenizer.from_pretrained(model_name)
11
 
12
  # 翻译功能
13
- def translate_pdf(pdf_file):
14
- # 读取 PDF 文件
15
- doc = fitz.open(pdf_file.name)
16
- text = ""
17
- for page in doc:
18
- text += page.get_text()
19
 
20
- # 为了避免输入超长,按段落拆分翻译
21
- paragraphs = text.split("\n")
22
- translated_paragraphs = []
23
-
24
- # 分批翻译每一段
25
- for paragraph in paragraphs:
26
- if len(paragraph.strip()) == 0:
27
- continue
28
- # 使用 T5 模型进行翻译
29
- inputs = tokenizer.encode("translate English to Chinese: " + paragraph, return_tensors="pt", max_length=512, truncation=True)
30
- outputs = model.generate(inputs, max_length=1024, num_beams=4, early_stopping=True)
31
- translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
32
- translated_paragraphs.append(translated_text)
33
-
34
- # 将所有翻译后的段落合并
35
- translated_text = "\n".join(translated_paragraphs)
36
-
37
- # 创建翻译后的 PDF
38
- translated_pdf = FPDF()
39
- translated_pdf.add_page()
40
- translated_pdf.set_font("Arial", size=12)
41
- translated_pdf.multi_cell(0, 10, translated_text)
42
-
43
- # 保存翻译后的 PDF
44
- translated_pdf_path = "translated_output.pdf"
45
- translated_pdf.output(translated_pdf_path)
46
-
47
- return translated_pdf_path
48
 
49
  # 创建 Gradio 界面
50
  with gr.Blocks() as demo:
51
- gr.Markdown("# PDF 英文翻译器")
52
 
53
  with gr.Row():
54
- pdf_input = gr.File(label="上传英文 PDF 文件", file_types=[".pdf"])
55
 
56
  translate_button = gr.Button("开始翻译")
57
- output_pdf = gr.File(label="下载翻译后的 PDF")
58
 
59
- translate_button.click(fn=translate_pdf, inputs=pdf_input, outputs=output_pdf)
60
 
61
  demo.launch()
 
1
  import gradio as gr
2
  from transformers import T5ForConditionalGeneration, T5Tokenizer
 
 
 
3
 
4
  # 加载 T5 模型和分词器
5
  model_name = "t5-small" # 可以根据需要调整模型大小
 
7
  tokenizer = T5Tokenizer.from_pretrained(model_name)
8
 
9
  # 翻译功能
10
+ def translate_text(input_text):
11
+ # 使用 T5 模型进行翻译
12
+ inputs = tokenizer.encode("translate English to Chinese: " + input_text, return_tensors="pt", max_length=512, truncation=True)
13
+ outputs = model.generate(inputs, max_length=1024, num_beams=4, early_stopping=True)
14
+ translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
15
 
16
+ return translated_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # 创建 Gradio 界面
19
  with gr.Blocks() as demo:
20
+ gr.Markdown("# 英文文本翻译器")
21
 
22
  with gr.Row():
23
+ text_input = gr.Textbox(label="输入英文文本", lines=5) # 让用户输入英文文本
24
 
25
  translate_button = gr.Button("开始翻译")
26
+ output_text = gr.Textbox(label="翻译后的中文文本", lines=5) # 显示翻译后的中文文本
27
 
28
+ translate_button.click(fn=translate_text, inputs=text_input, outputs=output_text)
29
 
30
  demo.launch()