amine_dubs commited on
Commit
140ca27
·
1 Parent(s): dbcd9b2
Files changed (1) hide show
  1. backend/main.py +124 -40
backend/main.py CHANGED
@@ -725,53 +725,133 @@ async def download_translated_document(request: Request):
725
  )
726
 
727
  elif filename.endswith('.pdf'):
728
- # Create PDF file with proper font for Arabic
729
  try:
730
  import fitz # PyMuPDF
731
  from io import BytesIO
 
 
732
 
733
- # Create a new PDF document
734
- doc = fitz.open()
735
- page = doc.new_page()
736
 
737
- # Use a font that supports Arabic
738
- # First try to find an installed Arabic font
739
- fontname = None
740
- arabic_fonts = ["Arial", "Arial Unicode MS", "Times New Roman", "Tahoma", "Calibri"]
741
- for font in arabic_fonts:
742
- try:
743
- fontname = font
744
- break
745
- except:
746
- continue
747
 
748
- # Set font parameters explicitly for Arabic text
749
- fontsize = 11
750
- # Insert text into the PDF with specific parameters for right-to-left text
751
- text_rect = fitz.Rect(50, 50, page.rect.width - 50, page.rect.height - 50)
752
-
753
- # Create a text writer with RTL direction for Arabic
754
- tw = fitz.TextWriter(page.rect)
755
- tw.append(text_rect.tl, content, fontname=fontname, fontsize=fontsize)
756
- tw.write_text(page)
757
-
758
- # Save to bytes
759
- pdf_bytes = BytesIO()
760
- doc.save(pdf_bytes)
761
- doc.close()
 
 
 
 
 
 
 
 
 
762
 
763
- # Return as attachment with proper encoding
764
- from fastapi.responses import Response
765
- return Response(
766
- content=pdf_bytes.getvalue(),
767
- media_type="application/pdf",
768
- headers={"Content-Disposition": f"attachment; filename={filename}"}
769
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
770
  except ImportError:
771
  return JSONResponse(
772
  status_code=501,
773
  content={"success": False, "error": "PDF creation requires PyMuPDF library"}
774
  )
 
 
 
 
 
 
 
775
 
776
  elif filename.endswith('.docx'):
777
  # Create DOCX file with proper encoding for Arabic
@@ -784,8 +864,13 @@ async def download_translated_document(request: Request):
784
 
785
  # Add a paragraph with the translated content
786
  p = doc.add_paragraph()
787
- # Set paragraph direction to right-to-left for Arabic
788
- p._element.get_or_add_pPr().set('bidi', True) # Set RTL direction
 
 
 
 
 
789
  p.add_run(content)
790
 
791
  # Save to bytes
@@ -806,11 +891,10 @@ async def download_translated_document(request: Request):
806
  content={"success": False, "error": "DOCX creation requires python-docx library"}
807
  )
808
  except Exception as e:
809
- # Additional error info for DOCX creation
810
- print(f"Error in DOCX creation: {str(e)}")
811
  traceback.print_exc()
812
  return JSONResponse(
813
- status_code=500,
814
  content={"success": False, "error": f"DOCX creation error: {str(e)}"}
815
  )
816
 
 
725
  )
726
 
727
  elif filename.endswith('.pdf'):
 
728
  try:
729
  import fitz # PyMuPDF
730
  from io import BytesIO
731
+ import tempfile
732
+ import os
733
 
734
+ # Check if text contains Arabic
735
+ has_arabic = any('\u0600' <= c <= '\u06FF' for c in content)
 
736
 
737
+ # For Arabic PDFs, we'll try a special approach:
738
+ # 1. Create a temporary HTML file with the Arabic text
739
+ # 2. Use PyMuPDF to convert the HTML to PDF
 
 
 
 
 
 
 
740
 
741
+ # Create a temporary HTML file with proper RTL styling
742
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.html', mode='w', encoding='utf-8') as temp_html:
743
+ html_content = f"""<!DOCTYPE html>
744
+ <html>
745
+ <head>
746
+ <meta charset="UTF-8">
747
+ <style>
748
+ @page {{ size: A4; margin: 2cm; }}
749
+ body {{
750
+ font-family: Arial, 'Times New Roman', sans-serif;
751
+ font-size: 12pt;
752
+ {'direction: rtl; text-align: right;' if has_arabic else 'direction: ltr; text-align: left;'}
753
+ }}
754
+ </style>
755
+ </head>
756
+ <body>
757
+ <div>
758
+ {content.replace('\n', '<br>')}
759
+ </div>
760
+ </body>
761
+ </html>"""
762
+ temp_html.write(html_content)
763
+ html_path = temp_html.name
764
 
765
+ try:
766
+ # Create a new PDF document from the HTML
767
+ doc = fitz.open()
768
+ pdf_bytes = BytesIO()
769
+
770
+ try:
771
+ # Try converting HTML to PDF using PyMuPDF
772
+ html_doc = fitz.open("html", html_content)
773
+ doc.insert_pdf(html_doc)
774
+ except Exception as html_err:
775
+ print(f"HTML conversion failed: {html_err}")
776
+
777
+ # Fallback: Create a simple PDF with basic text
778
+ page = doc.new_page()
779
+
780
+ # For right-to-left text like Arabic
781
+ if has_arabic:
782
+ # Add text as blocks from right to left
783
+ blocks = content.split("\n")
784
+ y_pos = 50
785
+ for block in blocks:
786
+ if block.strip():
787
+ rect = fitz.Rect(50, y_pos, page.rect.width - 50, y_pos + 20)
788
+ page.insert_text(rect.tl, block, fontsize=11,
789
+ fontname="helv", right_to_left=True)
790
+ y_pos += 20
791
+ else:
792
+ # For left-to-right text
793
+ page.insert_text((50, 50), content, fontsize=11)
794
+
795
+ # Save the PDF
796
+ doc.save(pdf_bytes)
797
+ doc.close()
798
+
799
+ # Clean up the temporary HTML file
800
+ try:
801
+ os.unlink(html_path)
802
+ except:
803
+ pass
804
+
805
+ # Return as attachment
806
+ from fastapi.responses import Response
807
+ return Response(
808
+ content=pdf_bytes.getvalue(),
809
+ media_type="application/pdf",
810
+ headers={"Content-Disposition": f"attachment; filename={filename}"}
811
+ )
812
+ except Exception as pdf_err:
813
+ print(f"PDF generation error: {pdf_err}")
814
+ traceback.print_exc()
815
+
816
+ # If PDF generation failed, try an even simpler approach
817
+ # Just create a plain text PDF
818
+ try:
819
+ doc = fitz.open()
820
+ page = doc.new_page()
821
+ font = "helv" # Built-in font with reasonable Unicode support
822
+ page.insert_text((50, 50), content, fontname=font, fontsize=11)
823
+
824
+ pdf_bytes = BytesIO()
825
+ doc.save(pdf_bytes)
826
+ doc.close()
827
+
828
+ return Response(
829
+ content=pdf_bytes.getvalue(),
830
+ media_type="application/pdf",
831
+ headers={"Content-Disposition": f"attachment; filename={filename}"}
832
+ )
833
+ except:
834
+ # If all PDF approaches fail, fall back to plain text
835
+ return Response(
836
+ content=content.encode('utf-8'),
837
+ media_type="text/plain; charset=utf-8",
838
+ headers={
839
+ "Content-Disposition": f"attachment; filename={filename.replace('.pdf', '.txt')}",
840
+ "Content-Type": "text/plain; charset=utf-8"
841
+ }
842
+ )
843
  except ImportError:
844
  return JSONResponse(
845
  status_code=501,
846
  content={"success": False, "error": "PDF creation requires PyMuPDF library"}
847
  )
848
+ except Exception as e:
849
+ print(f"PDF creation error: {str(e)}")
850
+ traceback.print_exc()
851
+ return JSONResponse(
852
+ status_code=500,
853
+ content={"success": False, "error": f"PDF creation failed: {str(e)}"}
854
+ )
855
 
856
  elif filename.endswith('.docx'):
857
  # Create DOCX file with proper encoding for Arabic
 
864
 
865
  # Add a paragraph with the translated content
866
  p = doc.add_paragraph()
867
+ # Set paragraph direction to right-to-left for Arabic if needed
868
+ is_arabic = any('\u0600' <= c <= '\u06FF' for c in content)
869
+ if is_arabic:
870
+ try:
871
+ p._element.get_or_add_pPr().set('bidi', True) # Set RTL direction
872
+ except:
873
+ pass # If this fails, continue with default direction
874
  p.add_run(content)
875
 
876
  # Save to bytes
 
891
  content={"success": False, "error": "DOCX creation requires python-docx library"}
892
  )
893
  except Exception as e:
894
+ print(f"DOCX creation error: {str(e)}")
 
895
  traceback.print_exc()
896
  return JSONResponse(
897
+ status_code=500,
898
  content={"success": False, "error": f"DOCX creation error: {str(e)}"}
899
  )
900