amine_dubs commited on
Commit
af0b13b
·
1 Parent(s): 140ca27
Files changed (1) hide show
  1. backend/main.py +52 -110
backend/main.py CHANGED
@@ -698,6 +698,9 @@ async def translate_document_endpoint(
698
  @app.post("/download/translated-document")
699
  async def download_translated_document(request: Request):
700
  """Creates and returns a downloadable version of the translated document."""
 
 
 
701
  try:
702
  # Parse request body
703
  data = await request.json()
@@ -714,7 +717,6 @@ async def download_translated_document(request: Request):
714
  # Handle different file types
715
  if filename.endswith('.txt'):
716
  # Simple text file with UTF-8 encoding
717
- from fastapi.responses import Response
718
  return Response(
719
  content=content.encode('utf-8'),
720
  media_type="text/plain; charset=utf-8",
@@ -731,115 +733,52 @@ async def download_translated_document(request: Request):
731
  import tempfile
732
  import os
733
 
 
 
 
 
734
  # Check if text contains Arabic
735
  has_arabic = any('\u0600' <= c <= '\u06FF' for c in content)
736
 
737
- # For Arabic PDFs, we'll try a special approach:
738
- # 1. Create a temporary HTML file with the Arabic text
739
- # 2. Use PyMuPDF to convert the HTML to PDF
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
740
 
741
- # Create a temporary HTML file with proper RTL styling
742
- with tempfile.NamedTemporaryFile(delete=False, suffix='.html', mode='w', encoding='utf-8') as temp_html:
743
- html_content = f"""<!DOCTYPE html>
744
- <html>
745
- <head>
746
- <meta charset="UTF-8">
747
- <style>
748
- @page {{ size: A4; margin: 2cm; }}
749
- body {{
750
- font-family: Arial, 'Times New Roman', sans-serif;
751
- font-size: 12pt;
752
- {'direction: rtl; text-align: right;' if has_arabic else 'direction: ltr; text-align: left;'}
753
- }}
754
- </style>
755
- </head>
756
- <body>
757
- <div>
758
- {content.replace('\n', '<br>')}
759
- </div>
760
- </body>
761
- </html>"""
762
- temp_html.write(html_content)
763
- html_path = temp_html.name
764
 
765
- try:
766
- # Create a new PDF document from the HTML
767
- doc = fitz.open()
768
- pdf_bytes = BytesIO()
769
-
770
- try:
771
- # Try converting HTML to PDF using PyMuPDF
772
- html_doc = fitz.open("html", html_content)
773
- doc.insert_pdf(html_doc)
774
- except Exception as html_err:
775
- print(f"HTML conversion failed: {html_err}")
776
-
777
- # Fallback: Create a simple PDF with basic text
778
- page = doc.new_page()
779
-
780
- # For right-to-left text like Arabic
781
- if has_arabic:
782
- # Add text as blocks from right to left
783
- blocks = content.split("\n")
784
- y_pos = 50
785
- for block in blocks:
786
- if block.strip():
787
- rect = fitz.Rect(50, y_pos, page.rect.width - 50, y_pos + 20)
788
- page.insert_text(rect.tl, block, fontsize=11,
789
- fontname="helv", right_to_left=True)
790
- y_pos += 20
791
- else:
792
- # For left-to-right text
793
- page.insert_text((50, 50), content, fontsize=11)
794
-
795
- # Save the PDF
796
- doc.save(pdf_bytes)
797
- doc.close()
798
-
799
- # Clean up the temporary HTML file
800
- try:
801
- os.unlink(html_path)
802
- except:
803
- pass
804
-
805
- # Return as attachment
806
- from fastapi.responses import Response
807
- return Response(
808
- content=pdf_bytes.getvalue(),
809
- media_type="application/pdf",
810
- headers={"Content-Disposition": f"attachment; filename={filename}"}
811
- )
812
- except Exception as pdf_err:
813
- print(f"PDF generation error: {pdf_err}")
814
- traceback.print_exc()
815
-
816
- # If PDF generation failed, try an even simpler approach
817
- # Just create a plain text PDF
818
- try:
819
- doc = fitz.open()
820
- page = doc.new_page()
821
- font = "helv" # Built-in font with reasonable Unicode support
822
- page.insert_text((50, 50), content, fontname=font, fontsize=11)
823
-
824
- pdf_bytes = BytesIO()
825
- doc.save(pdf_bytes)
826
- doc.close()
827
-
828
- return Response(
829
- content=pdf_bytes.getvalue(),
830
- media_type="application/pdf",
831
- headers={"Content-Disposition": f"attachment; filename={filename}"}
832
- )
833
- except:
834
- # If all PDF approaches fail, fall back to plain text
835
- return Response(
836
- content=content.encode('utf-8'),
837
- media_type="text/plain; charset=utf-8",
838
- headers={
839
- "Content-Disposition": f"attachment; filename={filename.replace('.pdf', '.txt')}",
840
- "Content-Type": "text/plain; charset=utf-8"
841
- }
842
- )
843
  except ImportError:
844
  return JSONResponse(
845
  status_code=501,
@@ -848,9 +787,14 @@ async def download_translated_document(request: Request):
848
  except Exception as e:
849
  print(f"PDF creation error: {str(e)}")
850
  traceback.print_exc()
851
- return JSONResponse(
852
- status_code=500,
853
- content={"success": False, "error": f"PDF creation failed: {str(e)}"}
 
 
 
 
 
854
  )
855
 
856
  elif filename.endswith('.docx'):
@@ -879,7 +823,6 @@ async def download_translated_document(request: Request):
879
  docx_bytes.seek(0)
880
 
881
  # Return as attachment with proper encoding
882
- from fastapi.responses import Response
883
  return Response(
884
  content=docx_bytes.getvalue(),
885
  media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -900,7 +843,6 @@ async def download_translated_document(request: Request):
900
 
901
  else:
902
  # Fallback to text file
903
- from fastapi.responses import Response
904
  return Response(
905
  content=content.encode('utf-8'),
906
  media_type="text/plain; charset=utf-8",
 
698
  @app.post("/download/translated-document")
699
  async def download_translated_document(request: Request):
700
  """Creates and returns a downloadable version of the translated document."""
701
+ # Import Response at the function start to ensure it's in scope for all code paths
702
+ from fastapi.responses import Response
703
+
704
  try:
705
  # Parse request body
706
  data = await request.json()
 
717
  # Handle different file types
718
  if filename.endswith('.txt'):
719
  # Simple text file with UTF-8 encoding
 
720
  return Response(
721
  content=content.encode('utf-8'),
722
  media_type="text/plain; charset=utf-8",
 
733
  import tempfile
734
  import os
735
 
736
+ # Create a new PDF document
737
+ doc = fitz.open()
738
+ page = doc.new_page()
739
+
740
  # Check if text contains Arabic
741
  has_arabic = any('\u0600' <= c <= '\u06FF' for c in content)
742
 
743
+ # Simple PDF creation approach - works with most PyMuPDF versions
744
+ try:
745
+ # For right-to-left text like Arabic
746
+ if has_arabic:
747
+ # Add text as blocks with appropriate alignment
748
+ blocks = content.split("\n")
749
+ y_pos = 50
750
+ for block in blocks:
751
+ if block.strip():
752
+ # Create text with proper alignment for Arabic
753
+ # Note: removed the right_to_left parameter as it's not supported
754
+ rect = fitz.Rect(50, y_pos, page.rect.width - 50, y_pos + 20)
755
+ # For Arabic, align text to the right side of the rectangle
756
+ if has_arabic:
757
+ page.insert_text(rect.tr, block, fontsize=11, fontname="helv")
758
+ else:
759
+ page.insert_text(rect.tl, block, fontsize=11, fontname="helv")
760
+ y_pos += 20
761
+ else:
762
+ # For left-to-right text
763
+ page.insert_text((50, 50), content, fontsize=11, fontname="helv")
764
 
765
+ except Exception as e:
766
+ print(f"Error inserting text: {e}")
767
+ # Most basic approach if all else fails
768
+ page.insert_text((50, 50), content, fontsize=11)
769
+
770
+ # Save the PDF
771
+ pdf_bytes = BytesIO()
772
+ doc.save(pdf_bytes)
773
+ doc.close()
774
+
775
+ # Return as attachment
776
+ return Response(
777
+ content=pdf_bytes.getvalue(),
778
+ media_type="application/pdf",
779
+ headers={"Content-Disposition": f"attachment; filename={filename}"}
780
+ )
 
 
 
 
 
 
 
781
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
  except ImportError:
783
  return JSONResponse(
784
  status_code=501,
 
787
  except Exception as e:
788
  print(f"PDF creation error: {str(e)}")
789
  traceback.print_exc()
790
+ # Return a text file instead
791
+ return Response(
792
+ content=content.encode('utf-8'),
793
+ media_type="text/plain; charset=utf-8",
794
+ headers={
795
+ "Content-Disposition": f"attachment; filename={filename.replace('.pdf', '.txt')}",
796
+ "Content-Type": "text/plain; charset=utf-8"
797
+ }
798
  )
799
 
800
  elif filename.endswith('.docx'):
 
823
  docx_bytes.seek(0)
824
 
825
  # Return as attachment with proper encoding
 
826
  return Response(
827
  content=docx_bytes.getvalue(),
828
  media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
 
843
 
844
  else:
845
  # Fallback to text file
 
846
  return Response(
847
  content=content.encode('utf-8'),
848
  media_type="text/plain; charset=utf-8",