Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -37,6 +37,7 @@ import difflib
|
|
| 37 |
import time
|
| 38 |
import shutil
|
| 39 |
import requests
|
|
|
|
| 40 |
|
| 41 |
import json
|
| 42 |
import markdown
|
|
@@ -370,6 +371,18 @@ def save_to_google_storage(bucket_name, file_path, destination_blob_name, expira
|
|
| 370 |
)
|
| 371 |
print(f"✅ File uploaded to Google Cloud Storage. Signed URL: {signed_url}")
|
| 372 |
return signed_url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
# Function to generate and save a document using ReportLab
|
| 374 |
def generate_document(task_description, md_content, user_name='jayw', bucket_name='curify'):
|
| 375 |
logger.debug("Starting to generate document")
|
|
@@ -385,29 +398,39 @@ def generate_document(task_description, md_content, user_name='jayw', bucket_nam
|
|
| 385 |
local_filename = f"{truncated_hash}.pdf" # Use the truncated hash as the local file name
|
| 386 |
c = canvas.Canvas(local_filename, pagesize=letter)
|
| 387 |
|
| 388 |
-
#
|
| 389 |
-
|
|
|
|
|
|
|
| 390 |
try:
|
| 391 |
-
# Register the
|
| 392 |
-
|
| 393 |
-
c.ttfonts.addRegisteredFont('
|
| 394 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
except Exception as e:
|
| 396 |
-
logger.error(f"Error loading font
|
| 397 |
-
raise RuntimeError("Failed to load
|
| 398 |
|
| 399 |
# Set initial Y position for drawing text
|
| 400 |
y_position = 750 # Starting position for text
|
| 401 |
|
| 402 |
# Process dictionary and render content
|
| 403 |
for key, value in md_content.items():
|
| 404 |
-
#
|
| 405 |
-
c.setFont(
|
| 406 |
c.drawString(100, y_position, f"# {key}")
|
| 407 |
y_position -= 20
|
| 408 |
|
|
|
|
|
|
|
|
|
|
| 409 |
# Add value
|
| 410 |
-
c.setFont('NotoSansCJK', 12) # Regular font for content
|
| 411 |
if isinstance(value, list): # Handle lists
|
| 412 |
for item in value:
|
| 413 |
c.drawString(100, y_position, f"- {item}")
|
|
@@ -419,7 +442,7 @@ def generate_document(task_description, md_content, user_name='jayw', bucket_nam
|
|
| 419 |
# Check if the page needs to be broken (if Y position is too low)
|
| 420 |
if y_position < 100:
|
| 421 |
c.showPage() # Create a new page
|
| 422 |
-
c.setFont('
|
| 423 |
y_position = 750 # Reset the Y position for the new page
|
| 424 |
|
| 425 |
# Save the PDF
|
|
@@ -432,7 +455,8 @@ def generate_document(task_description, md_content, user_name='jayw', bucket_nam
|
|
| 432 |
public_url = save_to_google_storage(bucket_name, local_filename, destination_blob_name)
|
| 433 |
logger.debug("Finished generating document")
|
| 434 |
return public_url
|
| 435 |
-
|
|
|
|
| 436 |
# In[10]:
|
| 437 |
|
| 438 |
|
|
|
|
| 37 |
import time
|
| 38 |
import shutil
|
| 39 |
import requests
|
| 40 |
+
import re
|
| 41 |
|
| 42 |
import json
|
| 43 |
import markdown
|
|
|
|
| 371 |
)
|
| 372 |
print(f"✅ File uploaded to Google Cloud Storage. Signed URL: {signed_url}")
|
| 373 |
return signed_url
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
# Function to check if content is Simplified Chinese
|
| 377 |
+
def is_simplified(text):
|
| 378 |
+
simplified_range = re.compile('[\u4e00-\u9fff]') # Han characters in general
|
| 379 |
+
simplified_characters = [char for char in text if simplified_range.match(char)]
|
| 380 |
+
return len(simplified_characters) > len(text) * 0.5 # Threshold of 50% to be considered simplified
|
| 381 |
+
|
| 382 |
+
# Function to choose the appropriate font for the content
|
| 383 |
+
def choose_font_for_content(content):
|
| 384 |
+
return 'NotoSansSC' if is_simplified(content) else 'NotoSansTC'
|
| 385 |
+
|
| 386 |
# Function to generate and save a document using ReportLab
|
| 387 |
def generate_document(task_description, md_content, user_name='jayw', bucket_name='curify'):
|
| 388 |
logger.debug("Starting to generate document")
|
|
|
|
| 398 |
local_filename = f"{truncated_hash}.pdf" # Use the truncated hash as the local file name
|
| 399 |
c = canvas.Canvas(local_filename, pagesize=letter)
|
| 400 |
|
| 401 |
+
# Paths to the TTF fonts for Simplified and Traditional Chinese
|
| 402 |
+
sc_font_path = 'NotoSansSC-Regular.ttf' # Path to Simplified Chinese font
|
| 403 |
+
tc_font_path = 'NotoSansTC-Regular.ttf' # Path to Traditional Chinese font
|
| 404 |
+
|
| 405 |
try:
|
| 406 |
+
# Register the Simplified Chinese font
|
| 407 |
+
sc_font = TTFont('NotoSansSC', sc_font_path)
|
| 408 |
+
c.ttfonts.addRegisteredFont('NotoSansSC', sc_font)
|
| 409 |
+
|
| 410 |
+
# Register the Traditional Chinese font
|
| 411 |
+
tc_font = TTFont('NotoSansTC', tc_font_path)
|
| 412 |
+
c.ttfonts.addRegisteredFont('NotoSansTC', tc_font)
|
| 413 |
+
|
| 414 |
+
# Set default font (Simplified Chinese or Traditional Chinese depending on content)
|
| 415 |
+
c.setFont('NotoSansSC', 12)
|
| 416 |
except Exception as e:
|
| 417 |
+
logger.error(f"Error loading font files: {e}")
|
| 418 |
+
raise RuntimeError("Failed to load one or more fonts. Ensure the font files are accessible.")
|
| 419 |
|
| 420 |
# Set initial Y position for drawing text
|
| 421 |
y_position = 750 # Starting position for text
|
| 422 |
|
| 423 |
# Process dictionary and render content
|
| 424 |
for key, value in md_content.items():
|
| 425 |
+
# Choose the font based on the key (header)
|
| 426 |
+
c.setFont(choose_font_for_content(key), 14)
|
| 427 |
c.drawString(100, y_position, f"# {key}")
|
| 428 |
y_position -= 20
|
| 429 |
|
| 430 |
+
# Choose the font for the value
|
| 431 |
+
c.setFont(choose_font_for_content(str(value)), 12)
|
| 432 |
+
|
| 433 |
# Add value
|
|
|
|
| 434 |
if isinstance(value, list): # Handle lists
|
| 435 |
for item in value:
|
| 436 |
c.drawString(100, y_position, f"- {item}")
|
|
|
|
| 442 |
# Check if the page needs to be broken (if Y position is too low)
|
| 443 |
if y_position < 100:
|
| 444 |
c.showPage() # Create a new page
|
| 445 |
+
c.setFont('NotoSansSC', 12) # Reset font
|
| 446 |
y_position = 750 # Reset the Y position for the new page
|
| 447 |
|
| 448 |
# Save the PDF
|
|
|
|
| 455 |
public_url = save_to_google_storage(bucket_name, local_filename, destination_blob_name)
|
| 456 |
logger.debug("Finished generating document")
|
| 457 |
return public_url
|
| 458 |
+
|
| 459 |
+
|
| 460 |
# In[10]:
|
| 461 |
|
| 462 |
|