Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -43,7 +43,6 @@ def process_pdf_to_epub(pdf_file, title, author):
|
|
43 |
anchor_text = get_anchor_text(pdf_path, page_num, pdf_engine="pdfreport", target_length=4000)
|
44 |
prompt = build_finetuning_prompt(anchor_text)
|
45 |
|
46 |
-
# Format prompt
|
47 |
messages = [
|
48 |
{
|
49 |
"role": "user",
|
@@ -74,11 +73,14 @@ def process_pdf_to_epub(pdf_file, title, author):
|
|
74 |
prompt_length = inputs["input_ids"].shape[1]
|
75 |
new_tokens = output[:, prompt_length:]
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
82 |
|
83 |
# Create chapter
|
84 |
chapter = epub.EpubHtml(title=f"Page {page_num}", file_name=f"page_{page_num}.xhtml", lang="en")
|
|
|
43 |
anchor_text = get_anchor_text(pdf_path, page_num, pdf_engine="pdfreport", target_length=4000)
|
44 |
prompt = build_finetuning_prompt(anchor_text)
|
45 |
|
|
|
46 |
messages = [
|
47 |
{
|
48 |
"role": "user",
|
|
|
73 |
prompt_length = inputs["input_ids"].shape[1]
|
74 |
new_tokens = output[:, prompt_length:]
|
75 |
|
76 |
+
except Exception as processing_error:
|
77 |
+
decoded = f"[Processing error on page {page_num}: {str(processing_error)}]"
|
78 |
+
else:
|
79 |
+
try:
|
80 |
+
decoded_list = processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
|
81 |
+
decoded = decoded_list[0].strip() if decoded_list else "[No output generated]"
|
82 |
+
except Exception as decode_error:
|
83 |
+
decoded = f"[Decoding error on page {page_num}: {str(decode_error)}]"
|
84 |
|
85 |
# Create chapter
|
86 |
chapter = epub.EpubHtml(title=f"Page {page_num}", file_name=f"page_{page_num}.xhtml", lang="en")
|