leonarb commited on
Commit
5201e8a
·
verified ·
1 Parent(s): 71a824e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -43,7 +43,6 @@ def process_pdf_to_epub(pdf_file, title, author):
43
  anchor_text = get_anchor_text(pdf_path, page_num, pdf_engine="pdfreport", target_length=4000)
44
  prompt = build_finetuning_prompt(anchor_text)
45
 
46
- # Format prompt
47
  messages = [
48
  {
49
  "role": "user",
@@ -74,11 +73,14 @@ def process_pdf_to_epub(pdf_file, title, author):
74
  prompt_length = inputs["input_ids"].shape[1]
75
  new_tokens = output[:, prompt_length:]
76
 
77
- try:
78
- decoded_list = processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
79
- decoded = decoded_list[0].strip() if decoded_list else "[No output generated]"
80
- except Exception as decode_error:
81
- decoded = f"[Decoding error on page {page_num}: {str(decode_error)}]"
 
 
 
82
 
83
  # Create chapter
84
  chapter = epub.EpubHtml(title=f"Page {page_num}", file_name=f"page_{page_num}.xhtml", lang="en")
 
43
  anchor_text = get_anchor_text(pdf_path, page_num, pdf_engine="pdfreport", target_length=4000)
44
  prompt = build_finetuning_prompt(anchor_text)
45
 
 
46
  messages = [
47
  {
48
  "role": "user",
 
73
  prompt_length = inputs["input_ids"].shape[1]
74
  new_tokens = output[:, prompt_length:]
75
 
76
+ except Exception as processing_error:
77
+ decoded = f"[Processing error on page {page_num}: {str(processing_error)}]"
78
+ else:
79
+ try:
80
+ decoded_list = processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
81
+ decoded = decoded_list[0].strip() if decoded_list else "[No output generated]"
82
+ except Exception as decode_error:
83
+ decoded = f"[Decoding error on page {page_num}: {str(decode_error)}]"
84
 
85
  # Create chapter
86
  chapter = epub.EpubHtml(title=f"Page {page_num}", file_name=f"page_{page_num}.xhtml", lang="en")