amine_dubs
commited on
Commit
·
140ca27
1
Parent(s):
dbcd9b2
- backend/main.py +124 -40
backend/main.py
CHANGED
@@ -725,53 +725,133 @@ async def download_translated_document(request: Request):
|
|
725 |
)
|
726 |
|
727 |
elif filename.endswith('.pdf'):
|
728 |
-
# Create PDF file with proper font for Arabic
|
729 |
try:
|
730 |
import fitz # PyMuPDF
|
731 |
from io import BytesIO
|
|
|
|
|
732 |
|
733 |
-
#
|
734 |
-
|
735 |
-
page = doc.new_page()
|
736 |
|
737 |
-
#
|
738 |
-
#
|
739 |
-
|
740 |
-
arabic_fonts = ["Arial", "Arial Unicode MS", "Times New Roman", "Tahoma", "Calibri"]
|
741 |
-
for font in arabic_fonts:
|
742 |
-
try:
|
743 |
-
fontname = font
|
744 |
-
break
|
745 |
-
except:
|
746 |
-
continue
|
747 |
|
748 |
-
#
|
749 |
-
|
750 |
-
|
751 |
-
|
752 |
-
|
753 |
-
|
754 |
-
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
|
759 |
-
|
760 |
-
|
761 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
762 |
|
763 |
-
|
764 |
-
|
765 |
-
|
766 |
-
|
767 |
-
|
768 |
-
|
769 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
770 |
except ImportError:
|
771 |
return JSONResponse(
|
772 |
status_code=501,
|
773 |
content={"success": False, "error": "PDF creation requires PyMuPDF library"}
|
774 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
775 |
|
776 |
elif filename.endswith('.docx'):
|
777 |
# Create DOCX file with proper encoding for Arabic
|
@@ -784,8 +864,13 @@ async def download_translated_document(request: Request):
|
|
784 |
|
785 |
# Add a paragraph with the translated content
|
786 |
p = doc.add_paragraph()
|
787 |
-
# Set paragraph direction to right-to-left for Arabic
|
788 |
-
|
|
|
|
|
|
|
|
|
|
|
789 |
p.add_run(content)
|
790 |
|
791 |
# Save to bytes
|
@@ -806,11 +891,10 @@ async def download_translated_document(request: Request):
|
|
806 |
content={"success": False, "error": "DOCX creation requires python-docx library"}
|
807 |
)
|
808 |
except Exception as e:
|
809 |
-
|
810 |
-
print(f"Error in DOCX creation: {str(e)}")
|
811 |
traceback.print_exc()
|
812 |
return JSONResponse(
|
813 |
-
status_code=500,
|
814 |
content={"success": False, "error": f"DOCX creation error: {str(e)}"}
|
815 |
)
|
816 |
|
|
|
725 |
)
|
726 |
|
727 |
elif filename.endswith('.pdf'):
|
|
|
728 |
try:
|
729 |
import fitz # PyMuPDF
|
730 |
from io import BytesIO
|
731 |
+
import tempfile
|
732 |
+
import os
|
733 |
|
734 |
+
# Check if text contains Arabic
|
735 |
+
has_arabic = any('\u0600' <= c <= '\u06FF' for c in content)
|
|
|
736 |
|
737 |
+
# For Arabic PDFs, we'll try a special approach:
|
738 |
+
# 1. Create a temporary HTML file with the Arabic text
|
739 |
+
# 2. Use PyMuPDF to convert the HTML to PDF
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
740 |
|
741 |
+
# Create a temporary HTML file with proper RTL styling
|
742 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.html', mode='w', encoding='utf-8') as temp_html:
|
743 |
+
html_content = f"""<!DOCTYPE html>
|
744 |
+
<html>
|
745 |
+
<head>
|
746 |
+
<meta charset="UTF-8">
|
747 |
+
<style>
|
748 |
+
@page {{ size: A4; margin: 2cm; }}
|
749 |
+
body {{
|
750 |
+
font-family: Arial, 'Times New Roman', sans-serif;
|
751 |
+
font-size: 12pt;
|
752 |
+
{'direction: rtl; text-align: right;' if has_arabic else 'direction: ltr; text-align: left;'}
|
753 |
+
}}
|
754 |
+
</style>
|
755 |
+
</head>
|
756 |
+
<body>
|
757 |
+
<div>
|
758 |
+
{content.replace('\n', '<br>')}
|
759 |
+
</div>
|
760 |
+
</body>
|
761 |
+
</html>"""
|
762 |
+
temp_html.write(html_content)
|
763 |
+
html_path = temp_html.name
|
764 |
|
765 |
+
try:
|
766 |
+
# Create a new PDF document from the HTML
|
767 |
+
doc = fitz.open()
|
768 |
+
pdf_bytes = BytesIO()
|
769 |
+
|
770 |
+
try:
|
771 |
+
# Try converting HTML to PDF using PyMuPDF
|
772 |
+
html_doc = fitz.open("html", html_content)
|
773 |
+
doc.insert_pdf(html_doc)
|
774 |
+
except Exception as html_err:
|
775 |
+
print(f"HTML conversion failed: {html_err}")
|
776 |
+
|
777 |
+
# Fallback: Create a simple PDF with basic text
|
778 |
+
page = doc.new_page()
|
779 |
+
|
780 |
+
# For right-to-left text like Arabic
|
781 |
+
if has_arabic:
|
782 |
+
# Add text as blocks from right to left
|
783 |
+
blocks = content.split("\n")
|
784 |
+
y_pos = 50
|
785 |
+
for block in blocks:
|
786 |
+
if block.strip():
|
787 |
+
rect = fitz.Rect(50, y_pos, page.rect.width - 50, y_pos + 20)
|
788 |
+
page.insert_text(rect.tl, block, fontsize=11,
|
789 |
+
fontname="helv", right_to_left=True)
|
790 |
+
y_pos += 20
|
791 |
+
else:
|
792 |
+
# For left-to-right text
|
793 |
+
page.insert_text((50, 50), content, fontsize=11)
|
794 |
+
|
795 |
+
# Save the PDF
|
796 |
+
doc.save(pdf_bytes)
|
797 |
+
doc.close()
|
798 |
+
|
799 |
+
# Clean up the temporary HTML file
|
800 |
+
try:
|
801 |
+
os.unlink(html_path)
|
802 |
+
except:
|
803 |
+
pass
|
804 |
+
|
805 |
+
# Return as attachment
|
806 |
+
from fastapi.responses import Response
|
807 |
+
return Response(
|
808 |
+
content=pdf_bytes.getvalue(),
|
809 |
+
media_type="application/pdf",
|
810 |
+
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
811 |
+
)
|
812 |
+
except Exception as pdf_err:
|
813 |
+
print(f"PDF generation error: {pdf_err}")
|
814 |
+
traceback.print_exc()
|
815 |
+
|
816 |
+
# If PDF generation failed, try an even simpler approach
|
817 |
+
# Just create a plain text PDF
|
818 |
+
try:
|
819 |
+
doc = fitz.open()
|
820 |
+
page = doc.new_page()
|
821 |
+
font = "helv" # Built-in font with reasonable Unicode support
|
822 |
+
page.insert_text((50, 50), content, fontname=font, fontsize=11)
|
823 |
+
|
824 |
+
pdf_bytes = BytesIO()
|
825 |
+
doc.save(pdf_bytes)
|
826 |
+
doc.close()
|
827 |
+
|
828 |
+
return Response(
|
829 |
+
content=pdf_bytes.getvalue(),
|
830 |
+
media_type="application/pdf",
|
831 |
+
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
832 |
+
)
|
833 |
+
except:
|
834 |
+
# If all PDF approaches fail, fall back to plain text
|
835 |
+
return Response(
|
836 |
+
content=content.encode('utf-8'),
|
837 |
+
media_type="text/plain; charset=utf-8",
|
838 |
+
headers={
|
839 |
+
"Content-Disposition": f"attachment; filename={filename.replace('.pdf', '.txt')}",
|
840 |
+
"Content-Type": "text/plain; charset=utf-8"
|
841 |
+
}
|
842 |
+
)
|
843 |
except ImportError:
|
844 |
return JSONResponse(
|
845 |
status_code=501,
|
846 |
content={"success": False, "error": "PDF creation requires PyMuPDF library"}
|
847 |
)
|
848 |
+
except Exception as e:
|
849 |
+
print(f"PDF creation error: {str(e)}")
|
850 |
+
traceback.print_exc()
|
851 |
+
return JSONResponse(
|
852 |
+
status_code=500,
|
853 |
+
content={"success": False, "error": f"PDF creation failed: {str(e)}"}
|
854 |
+
)
|
855 |
|
856 |
elif filename.endswith('.docx'):
|
857 |
# Create DOCX file with proper encoding for Arabic
|
|
|
864 |
|
865 |
# Add a paragraph with the translated content
|
866 |
p = doc.add_paragraph()
|
867 |
+
# Set paragraph direction to right-to-left for Arabic if needed
|
868 |
+
is_arabic = any('\u0600' <= c <= '\u06FF' for c in content)
|
869 |
+
if is_arabic:
|
870 |
+
try:
|
871 |
+
p._element.get_or_add_pPr().set('bidi', True) # Set RTL direction
|
872 |
+
except:
|
873 |
+
pass # If this fails, continue with default direction
|
874 |
p.add_run(content)
|
875 |
|
876 |
# Save to bytes
|
|
|
891 |
content={"success": False, "error": "DOCX creation requires python-docx library"}
|
892 |
)
|
893 |
except Exception as e:
|
894 |
+
print(f"DOCX creation error: {str(e)}")
|
|
|
895 |
traceback.print_exc()
|
896 |
return JSONResponse(
|
897 |
+
status_code=500,
|
898 |
content={"success": False, "error": f"DOCX creation error: {str(e)}"}
|
899 |
)
|
900 |
|