amine_dubs
commited on
Commit
·
52c54ab
1
Parent(s):
1913c15
- backend/main.py +128 -46
- backend/requirements.txt +2 -1
backend/main.py
CHANGED
@@ -728,30 +728,21 @@ async def download_translated_document(request: Request):
|
|
728 |
|
729 |
elif filename.endswith('.pdf'):
|
730 |
try:
|
731 |
-
# For PDF files,
|
732 |
-
# Try to create a simple PDF with reportlab, which should be available
|
733 |
try:
|
|
|
734 |
from reportlab.pdfgen import canvas
|
735 |
from reportlab.lib.pagesizes import letter
|
736 |
from io import BytesIO
|
737 |
-
|
738 |
-
|
739 |
-
from reportlab.lib.colors import black
|
740 |
|
741 |
# Create a PDF in memory
|
742 |
buffer = BytesIO()
|
743 |
c = canvas.Canvas(buffer, pagesize=letter)
|
744 |
|
745 |
-
#
|
746 |
-
|
747 |
-
# Try to use a system font that supports Arabic
|
748 |
-
pdfmetrics.registerFont(TTFont('Arabic', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf'))
|
749 |
-
font_name = 'Arabic'
|
750 |
-
except:
|
751 |
-
# Default to built-in Helvetica which has limited Arabic support
|
752 |
-
font_name = 'Helvetica'
|
753 |
-
|
754 |
-
# Set font
|
755 |
c.setFont(font_name, 12)
|
756 |
|
757 |
# Check if text contains Arabic
|
@@ -761,64 +752,154 @@ async def download_translated_document(request: Request):
|
|
761 |
lines = content.split('\n')
|
762 |
y_position = 750 # Start from top
|
763 |
|
764 |
-
# Draw text
|
765 |
for line in lines:
|
766 |
if line.strip():
|
767 |
-
# For Arabic,
|
768 |
if has_arabic:
|
769 |
-
#
|
770 |
text_width = c.stringWidth(line, font_name, 12)
|
771 |
-
|
|
|
772 |
else:
|
773 |
-
# Left-
|
774 |
-
c.drawString(
|
|
|
|
|
775 |
y_position -= 14
|
776 |
|
777 |
-
# Add a new page if
|
778 |
-
if y_position <
|
779 |
c.showPage()
|
|
|
780 |
y_position = 750
|
781 |
|
|
|
782 |
c.save()
|
783 |
|
784 |
# Get PDF content
|
785 |
pdf_content = buffer.getvalue()
|
786 |
buffer.close()
|
787 |
|
788 |
-
# Return PDF
|
789 |
return Response(
|
790 |
content=pdf_content,
|
791 |
media_type="application/pdf",
|
792 |
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
793 |
)
|
|
|
794 |
except ImportError:
|
795 |
-
|
796 |
-
|
|
|
797 |
from io import BytesIO
|
|
|
|
|
|
|
798 |
|
799 |
-
#
|
800 |
-
|
801 |
-
|
802 |
-
|
803 |
-
# Add text - keep it very simple
|
804 |
-
page.insert_text((72, 72), content)
|
805 |
-
|
806 |
-
# Save PDF
|
807 |
-
pdf_bytes = BytesIO()
|
808 |
-
doc.save(pdf_bytes)
|
809 |
-
pdf_bytes.seek(0)
|
810 |
-
doc.close()
|
811 |
|
812 |
-
|
813 |
-
|
814 |
-
media_type="application/pdf",
|
815 |
-
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
816 |
-
)
|
817 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
818 |
except Exception as e:
|
819 |
-
print(f"PDF creation error: {
|
820 |
traceback.print_exc()
|
821 |
-
|
|
|
822 |
return Response(
|
823 |
content=content.encode('utf-8'),
|
824 |
media_type="text/plain; charset=utf-8",
|
@@ -827,8 +908,9 @@ async def download_translated_document(request: Request):
|
|
827 |
"Content-Type": "text/plain; charset=utf-8"
|
828 |
}
|
829 |
)
|
|
|
830 |
except Exception as e:
|
831 |
-
print(f"PDF creation error: {
|
832 |
traceback.print_exc()
|
833 |
# Return a text file as fallback
|
834 |
return Response(
|
|
|
728 |
|
729 |
elif filename.endswith('.pdf'):
|
730 |
try:
|
731 |
+
# For PDF files, try multiple approaches
|
|
|
732 |
try:
|
733 |
+
# Try ReportLab first (which handles Arabic better)
|
734 |
from reportlab.pdfgen import canvas
|
735 |
from reportlab.lib.pagesizes import letter
|
736 |
from io import BytesIO
|
737 |
+
|
738 |
+
print("Using ReportLab for PDF generation")
|
|
|
739 |
|
740 |
# Create a PDF in memory
|
741 |
buffer = BytesIO()
|
742 |
c = canvas.Canvas(buffer, pagesize=letter)
|
743 |
|
744 |
+
# Use a basic font that should work with most installations
|
745 |
+
font_name = 'Helvetica'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
746 |
c.setFont(font_name, 12)
|
747 |
|
748 |
# Check if text contains Arabic
|
|
|
752 |
lines = content.split('\n')
|
753 |
y_position = 750 # Start from top
|
754 |
|
755 |
+
# Draw text line by line
|
756 |
for line in lines:
|
757 |
if line.strip():
|
758 |
+
# For Arabic, right-align the text
|
759 |
if has_arabic:
|
760 |
+
# Get width to calculate right alignment
|
761 |
text_width = c.stringWidth(line, font_name, 12)
|
762 |
+
# Position from right margin
|
763 |
+
c.drawString(letter[0] - 72 - text_width, y_position, line)
|
764 |
else:
|
765 |
+
# Left-align for non-Arabic text
|
766 |
+
c.drawString(72, y_position, line)
|
767 |
+
|
768 |
+
# Move down for next line
|
769 |
y_position -= 14
|
770 |
|
771 |
+
# Add a new page if needed
|
772 |
+
if y_position < 72:
|
773 |
c.showPage()
|
774 |
+
c.setFont(font_name, 12)
|
775 |
y_position = 750
|
776 |
|
777 |
+
# Save the PDF to the buffer
|
778 |
c.save()
|
779 |
|
780 |
# Get PDF content
|
781 |
pdf_content = buffer.getvalue()
|
782 |
buffer.close()
|
783 |
|
784 |
+
# Return the PDF
|
785 |
return Response(
|
786 |
content=pdf_content,
|
787 |
media_type="application/pdf",
|
788 |
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
789 |
)
|
790 |
+
|
791 |
except ImportError:
|
792 |
+
# Fall back to PyMuPDF with improved approach for Arabic
|
793 |
+
print("ReportLab not available, using PyMuPDF with improved Arabic handling")
|
794 |
+
import fitz
|
795 |
from io import BytesIO
|
796 |
+
import uuid
|
797 |
+
import os
|
798 |
+
import tempfile
|
799 |
|
800 |
+
# For PyMuPDF, we'll take a different approach for Arabic text:
|
801 |
+
# 1. Create a temporary HTML file with the Arabic text and proper RTL styling
|
802 |
+
# 2. Convert it to PDF using PyMuPDF's HTML parser
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
803 |
|
804 |
+
# Determine if we have Arabic text
|
805 |
+
has_arabic = any('\u0600' <= ch <= '\u06FF' for ch in content)
|
|
|
|
|
|
|
806 |
|
807 |
+
if has_arabic:
|
808 |
+
# Create a temporary HTML file with RTL direction for Arabic
|
809 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.html', mode='w', encoding='utf-8') as temp_file:
|
810 |
+
html_content = f"""<!DOCTYPE html>
|
811 |
+
<html dir="rtl" lang="ar">
|
812 |
+
<head>
|
813 |
+
<meta charset="UTF-8">
|
814 |
+
<title>Translated Document</title>
|
815 |
+
<style>
|
816 |
+
body {{
|
817 |
+
font-family: Arial, sans-serif;
|
818 |
+
direction: rtl;
|
819 |
+
text-align: right;
|
820 |
+
margin: 1.5cm;
|
821 |
+
font-size: 12pt;
|
822 |
+
line-height: 1.5;
|
823 |
+
}}
|
824 |
+
</style>
|
825 |
+
</head>
|
826 |
+
<body>
|
827 |
+
{content.replace('\n', '<br>')}
|
828 |
+
</body>
|
829 |
+
</html>"""
|
830 |
+
temp_file.write(html_content)
|
831 |
+
temp_html_path = temp_file.name
|
832 |
+
|
833 |
+
try:
|
834 |
+
# Convert HTML to PDF
|
835 |
+
doc = fitz.open()
|
836 |
+
|
837 |
+
# Load the HTML file as a separate document and insert it
|
838 |
+
html_doc = fitz.open(temp_html_path)
|
839 |
+
doc.insert_pdf(html_doc)
|
840 |
+
html_doc.close()
|
841 |
+
|
842 |
+
# Save to memory
|
843 |
+
pdf_bytes = BytesIO()
|
844 |
+
doc.save(pdf_bytes)
|
845 |
+
doc.close()
|
846 |
+
|
847 |
+
# Clean up temporary file
|
848 |
+
try:
|
849 |
+
os.unlink(temp_html_path)
|
850 |
+
except:
|
851 |
+
pass
|
852 |
+
|
853 |
+
# Return the PDF
|
854 |
+
return Response(
|
855 |
+
content=pdf_bytes.getvalue(),
|
856 |
+
media_type="application/pdf",
|
857 |
+
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
858 |
+
)
|
859 |
+
except Exception as html_err:
|
860 |
+
print(f"HTML conversion failed: {html_err}")
|
861 |
+
# Clean up temp file if it exists
|
862 |
+
try:
|
863 |
+
os.unlink(temp_html_path)
|
864 |
+
except:
|
865 |
+
pass
|
866 |
+
|
867 |
+
# Fall back to text file since all PDF attempts failed
|
868 |
+
return Response(
|
869 |
+
content=content.encode('utf-8'),
|
870 |
+
media_type="text/plain; charset=utf-8",
|
871 |
+
headers={
|
872 |
+
"Content-Disposition": f"attachment; filename={filename.replace('.pdf', '.txt')}",
|
873 |
+
"Content-Type": "text/plain; charset=utf-8"
|
874 |
+
}
|
875 |
+
)
|
876 |
+
else:
|
877 |
+
# For non-Arabic text, use the simpler PDF creation method
|
878 |
+
doc = fitz.open()
|
879 |
+
page = doc.new_page()
|
880 |
+
|
881 |
+
# Add text content
|
882 |
+
rect = fitz.Rect(72, 72, page.rect.width-72, page.rect.height-72)
|
883 |
+
page.insert_text((72, 72), content, fontsize=11)
|
884 |
+
|
885 |
+
# Save to memory
|
886 |
+
pdf_bytes = BytesIO()
|
887 |
+
doc.save(pdf_bytes)
|
888 |
+
pdf_bytes.seek(0)
|
889 |
+
doc.close()
|
890 |
+
|
891 |
+
# Return the PDF
|
892 |
+
return Response(
|
893 |
+
content=pdf_bytes.getvalue(),
|
894 |
+
media_type="application/pdf",
|
895 |
+
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
896 |
+
)
|
897 |
+
|
898 |
except Exception as e:
|
899 |
+
print(f"PDF creation error with advanced methods: {e}")
|
900 |
traceback.print_exc()
|
901 |
+
|
902 |
+
# Fall back to text file if all PDF attempts fail
|
903 |
return Response(
|
904 |
content=content.encode('utf-8'),
|
905 |
media_type="text/plain; charset=utf-8",
|
|
|
908 |
"Content-Type": "text/plain; charset=utf-8"
|
909 |
}
|
910 |
)
|
911 |
+
|
912 |
except Exception as e:
|
913 |
+
print(f"Overall PDF creation error: {e}")
|
914 |
traceback.print_exc()
|
915 |
# Return a text file as fallback
|
916 |
return Response(
|
backend/requirements.txt
CHANGED
@@ -13,4 +13,5 @@ sentencepiece
|
|
13 |
tensorflow
|
14 |
accelerate
|
15 |
langdetect
|
16 |
-
hf_xet
|
|
|
|
13 |
tensorflow
|
14 |
accelerate
|
15 |
langdetect
|
16 |
+
hf_xet
|
17 |
+
reportlab # Added for PDF generation with Arabic support
|