amine_dubs commited on
Commit
52c54ab
·
1 Parent(s): 1913c15
Files changed (2) hide show
  1. backend/main.py +128 -46
  2. backend/requirements.txt +2 -1
backend/main.py CHANGED
@@ -728,30 +728,21 @@ async def download_translated_document(request: Request):
728
 
729
  elif filename.endswith('.pdf'):
730
  try:
731
- # For PDF files, let's use a very basic approach with a text-based fallback
732
- # Try to create a simple PDF with reportlab, which should be available
733
  try:
 
734
  from reportlab.pdfgen import canvas
735
  from reportlab.lib.pagesizes import letter
736
  from io import BytesIO
737
- from reportlab.pdfbase import pdfmetrics
738
- from reportlab.pdfbase.ttfonts import TTFont
739
- from reportlab.lib.colors import black
740
 
741
  # Create a PDF in memory
742
  buffer = BytesIO()
743
  c = canvas.Canvas(buffer, pagesize=letter)
744
 
745
- # Try to register a font that supports Arabic
746
- try:
747
- # Try to use a system font that supports Arabic
748
- pdfmetrics.registerFont(TTFont('Arabic', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf'))
749
- font_name = 'Arabic'
750
- except:
751
- # Default to built-in Helvetica which has limited Arabic support
752
- font_name = 'Helvetica'
753
-
754
- # Set font
755
  c.setFont(font_name, 12)
756
 
757
  # Check if text contains Arabic
@@ -761,64 +752,154 @@ async def download_translated_document(request: Request):
761
  lines = content.split('\n')
762
  y_position = 750 # Start from top
763
 
764
- # Draw text with proper handling for Arabic
765
  for line in lines:
766
  if line.strip():
767
- # For Arabic, we write from right to left
768
  if has_arabic:
769
- # Right-aligned text
770
  text_width = c.stringWidth(line, font_name, 12)
771
- c.drawString(letter[0] - 50 - text_width, y_position, line)
 
772
  else:
773
- # Left-aligned text
774
- c.drawString(50, y_position, line)
 
 
775
  y_position -= 14
776
 
777
- # Add a new page if we reach the bottom
778
- if y_position < 50:
779
  c.showPage()
 
780
  y_position = 750
781
 
 
782
  c.save()
783
 
784
  # Get PDF content
785
  pdf_content = buffer.getvalue()
786
  buffer.close()
787
 
788
- # Return PDF
789
  return Response(
790
  content=pdf_content,
791
  media_type="application/pdf",
792
  headers={"Content-Disposition": f"attachment; filename={filename}"}
793
  )
 
794
  except ImportError:
795
- print("Reportlab not available, trying with PyMuPDF")
796
- import fitz # PyMuPDF
 
797
  from io import BytesIO
 
 
 
798
 
799
- # Create a new PDF
800
- doc = fitz.open()
801
- page = doc.new_page()
802
-
803
- # Add text - keep it very simple
804
- page.insert_text((72, 72), content)
805
-
806
- # Save PDF
807
- pdf_bytes = BytesIO()
808
- doc.save(pdf_bytes)
809
- pdf_bytes.seek(0)
810
- doc.close()
811
 
812
- return Response(
813
- content=pdf_bytes.getvalue(),
814
- media_type="application/pdf",
815
- headers={"Content-Disposition": f"attachment; filename={filename}"}
816
- )
817
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
818
  except Exception as e:
819
- print(f"PDF creation error: {str(e)}")
820
  traceback.print_exc()
821
- # Fallback to text file
 
822
  return Response(
823
  content=content.encode('utf-8'),
824
  media_type="text/plain; charset=utf-8",
@@ -827,8 +908,9 @@ async def download_translated_document(request: Request):
827
  "Content-Type": "text/plain; charset=utf-8"
828
  }
829
  )
 
830
  except Exception as e:
831
- print(f"PDF creation error: {str(e)}")
832
  traceback.print_exc()
833
  # Return a text file as fallback
834
  return Response(
 
728
 
729
  elif filename.endswith('.pdf'):
730
  try:
731
+ # For PDF files, try multiple approaches
 
732
  try:
733
+ # Try ReportLab first (which handles Arabic better)
734
  from reportlab.pdfgen import canvas
735
  from reportlab.lib.pagesizes import letter
736
  from io import BytesIO
737
+
738
+ print("Using ReportLab for PDF generation")
 
739
 
740
  # Create a PDF in memory
741
  buffer = BytesIO()
742
  c = canvas.Canvas(buffer, pagesize=letter)
743
 
744
+ # Use a basic font that should work with most installations
745
+ font_name = 'Helvetica'
 
 
 
 
 
 
 
 
746
  c.setFont(font_name, 12)
747
 
748
  # Check if text contains Arabic
 
752
  lines = content.split('\n')
753
  y_position = 750 # Start from top
754
 
755
+ # Draw text line by line
756
  for line in lines:
757
  if line.strip():
758
+ # For Arabic, right-align the text
759
  if has_arabic:
760
+ # Get width to calculate right alignment
761
  text_width = c.stringWidth(line, font_name, 12)
762
+ # Position from right margin
763
+ c.drawString(letter[0] - 72 - text_width, y_position, line)
764
  else:
765
+ # Left-align for non-Arabic text
766
+ c.drawString(72, y_position, line)
767
+
768
+ # Move down for next line
769
  y_position -= 14
770
 
771
+ # Add a new page if needed
772
+ if y_position < 72:
773
  c.showPage()
774
+ c.setFont(font_name, 12)
775
  y_position = 750
776
 
777
+ # Save the PDF to the buffer
778
  c.save()
779
 
780
  # Get PDF content
781
  pdf_content = buffer.getvalue()
782
  buffer.close()
783
 
784
+ # Return the PDF
785
  return Response(
786
  content=pdf_content,
787
  media_type="application/pdf",
788
  headers={"Content-Disposition": f"attachment; filename={filename}"}
789
  )
790
+
791
  except ImportError:
792
+ # Fall back to PyMuPDF with improved approach for Arabic
793
+ print("ReportLab not available, using PyMuPDF with improved Arabic handling")
794
+ import fitz
795
  from io import BytesIO
796
+ import uuid
797
+ import os
798
+ import tempfile
799
 
800
+ # For PyMuPDF, we'll take a different approach for Arabic text:
801
+ # 1. Create a temporary HTML file with the Arabic text and proper RTL styling
802
+ # 2. Convert it to PDF using PyMuPDF's HTML parser
 
 
 
 
 
 
 
 
 
803
 
804
+ # Determine if we have Arabic text
805
+ has_arabic = any('\u0600' <= ch <= '\u06FF' for ch in content)
 
 
 
806
 
807
+ if has_arabic:
808
+ # Create a temporary HTML file with RTL direction for Arabic
809
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.html', mode='w', encoding='utf-8') as temp_file:
810
+ html_content = f"""<!DOCTYPE html>
811
+ <html dir="rtl" lang="ar">
812
+ <head>
813
+ <meta charset="UTF-8">
814
+ <title>Translated Document</title>
815
+ <style>
816
+ body {{
817
+ font-family: Arial, sans-serif;
818
+ direction: rtl;
819
+ text-align: right;
820
+ margin: 1.5cm;
821
+ font-size: 12pt;
822
+ line-height: 1.5;
823
+ }}
824
+ </style>
825
+ </head>
826
+ <body>
827
+ {content.replace('\n', '<br>')}
828
+ </body>
829
+ </html>"""
830
+ temp_file.write(html_content)
831
+ temp_html_path = temp_file.name
832
+
833
+ try:
834
+ # Convert HTML to PDF
835
+ doc = fitz.open()
836
+
837
+ # Load the HTML file as a separate document and insert it
838
+ html_doc = fitz.open(temp_html_path)
839
+ doc.insert_pdf(html_doc)
840
+ html_doc.close()
841
+
842
+ # Save to memory
843
+ pdf_bytes = BytesIO()
844
+ doc.save(pdf_bytes)
845
+ doc.close()
846
+
847
+ # Clean up temporary file
848
+ try:
849
+ os.unlink(temp_html_path)
850
+ except:
851
+ pass
852
+
853
+ # Return the PDF
854
+ return Response(
855
+ content=pdf_bytes.getvalue(),
856
+ media_type="application/pdf",
857
+ headers={"Content-Disposition": f"attachment; filename={filename}"}
858
+ )
859
+ except Exception as html_err:
860
+ print(f"HTML conversion failed: {html_err}")
861
+ # Clean up temp file if it exists
862
+ try:
863
+ os.unlink(temp_html_path)
864
+ except:
865
+ pass
866
+
867
+ # Fall back to text file since all PDF attempts failed
868
+ return Response(
869
+ content=content.encode('utf-8'),
870
+ media_type="text/plain; charset=utf-8",
871
+ headers={
872
+ "Content-Disposition": f"attachment; filename={filename.replace('.pdf', '.txt')}",
873
+ "Content-Type": "text/plain; charset=utf-8"
874
+ }
875
+ )
876
+ else:
877
+ # For non-Arabic text, use the simpler PDF creation method
878
+ doc = fitz.open()
879
+ page = doc.new_page()
880
+
881
+ # Add text content
882
+ rect = fitz.Rect(72, 72, page.rect.width-72, page.rect.height-72)
883
+ page.insert_text((72, 72), content, fontsize=11)
884
+
885
+ # Save to memory
886
+ pdf_bytes = BytesIO()
887
+ doc.save(pdf_bytes)
888
+ pdf_bytes.seek(0)
889
+ doc.close()
890
+
891
+ # Return the PDF
892
+ return Response(
893
+ content=pdf_bytes.getvalue(),
894
+ media_type="application/pdf",
895
+ headers={"Content-Disposition": f"attachment; filename={filename}"}
896
+ )
897
+
898
  except Exception as e:
899
+ print(f"PDF creation error with advanced methods: {e}")
900
  traceback.print_exc()
901
+
902
+ # Fall back to text file if all PDF attempts fail
903
  return Response(
904
  content=content.encode('utf-8'),
905
  media_type="text/plain; charset=utf-8",
 
908
  "Content-Type": "text/plain; charset=utf-8"
909
  }
910
  )
911
+
912
  except Exception as e:
913
+ print(f"Overall PDF creation error: {e}")
914
  traceback.print_exc()
915
  # Return a text file as fallback
916
  return Response(
backend/requirements.txt CHANGED
@@ -13,4 +13,5 @@ sentencepiece
13
  tensorflow
14
  accelerate
15
  langdetect
16
- hf_xet
 
 
13
  tensorflow
14
  accelerate
15
  langdetect
16
+ hf_xet
17
+ reportlab # Added for PDF generation with Arabic support