Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -654,47 +654,124 @@ def extract_json_from_llm_response(raw_response: str) -> dict:
|
|
654 |
logger.error("Sanitized JSON still invalid:\n%s", json_string)
|
655 |
raise
|
656 |
|
657 |
-
def clean_base64_for_model(raw_b64):
|
658 |
-
|
659 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
660 |
|
661 |
-
|
662 |
-
|
663 |
-
- a PIL Image instance → encodes to PNG/base64
|
664 |
-
- a raw base64 string → strips whitespace and data URI prefix
|
665 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
666 |
if not raw_b64:
|
667 |
-
return ""
|
668 |
|
669 |
-
# 1. If it’s a list, take its first element
|
670 |
if isinstance(raw_b64, list):
|
671 |
raw_b64 = raw_b64[0] if raw_b64 else ""
|
672 |
if not raw_b64:
|
673 |
-
return ""
|
674 |
|
675 |
-
# 2. If it’s a PIL Image, convert to base64
|
676 |
if isinstance(raw_b64, Image.Image):
|
677 |
buf = io.BytesIO()
|
678 |
raw_b64.save(buf, format="PNG")
|
679 |
raw_b64 = base64.b64encode(buf.getvalue()).decode()
|
680 |
|
681 |
-
# 3. At this point it must be a string
|
682 |
if not isinstance(raw_b64, str):
|
683 |
raise TypeError(f"Expected base64 string or PIL Image, got {type(raw_b64)}")
|
684 |
|
685 |
-
#
|
686 |
clean_b64 = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", raw_b64)
|
687 |
clean_b64 = clean_b64.replace("\n", "").replace("\r", "").strip()
|
688 |
|
689 |
-
#
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
|
|
|
|
|
|
|
|
698 |
|
699 |
def format_scratch_pseudo_code(code_string):
|
700 |
"""
|
|
|
654 |
logger.error("Sanitized JSON still invalid:\n%s", json_string)
|
655 |
raise
|
656 |
|
657 |
+
# def clean_base64_for_model(raw_b64):
|
658 |
+
# """
|
659 |
+
# Normalize input into a valid data:image/png;base64,<payload> string.
|
660 |
+
|
661 |
+
# Accepts:
|
662 |
+
# - a list of base64 strings → picks the first element
|
663 |
+
# - a PIL Image instance → encodes to PNG/base64
|
664 |
+
# - a raw base64 string → strips whitespace and data URI prefix
|
665 |
+
# """
|
666 |
+
# if not raw_b64:
|
667 |
+
# return ""
|
668 |
+
|
669 |
+
# # 1. If it’s a list, take its first element
|
670 |
+
# if isinstance(raw_b64, list):
|
671 |
+
# raw_b64 = raw_b64[0] if raw_b64 else ""
|
672 |
+
# if not raw_b64:
|
673 |
+
# return ""
|
674 |
+
|
675 |
+
# # 2. If it’s a PIL Image, convert to base64
|
676 |
+
# if isinstance(raw_b64, Image.Image):
|
677 |
+
# buf = io.BytesIO()
|
678 |
+
# raw_b64.save(buf, format="PNG")
|
679 |
+
# raw_b64 = base64.b64encode(buf.getvalue()).decode()
|
680 |
+
|
681 |
+
# # 3. At this point it must be a string
|
682 |
+
# if not isinstance(raw_b64, str):
|
683 |
+
# raise TypeError(f"Expected base64 string or PIL Image, got {type(raw_b64)}")
|
684 |
+
|
685 |
+
# # 4. Strip any existing data URI prefix, whitespace, or newlines
|
686 |
+
# clean_b64 = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", raw_b64)
|
687 |
+
# clean_b64 = clean_b64.replace("\n", "").replace("\r", "").strip()
|
688 |
+
|
689 |
+
# # 5. Validate it’s proper base64
|
690 |
+
# try:
|
691 |
+
# base64.b64decode(clean_b64)
|
692 |
+
# except Exception as e:
|
693 |
+
# logger.error(f"Invalid Base64 passed to model: {e}")
|
694 |
+
# raise
|
695 |
+
|
696 |
+
# # 6. Return with the correct data URI prefix
|
697 |
+
# return f"data:image/png;base64,{clean_b64}"
|
698 |
|
699 |
+
# reducing imagebase64 size if greater than something
|
700 |
+
def reduce_image_size_to_limit(clean_b64_str, max_kb=4000):
|
|
|
|
|
701 |
"""
|
702 |
+
Reduce an image's size to be as close as possible to max_kb without exceeding it.
|
703 |
+
Returns the final base64 string and its size in KB.
|
704 |
+
"""
|
705 |
+
import re, base64
|
706 |
+
from io import BytesIO
|
707 |
+
from PIL import Image
|
708 |
+
|
709 |
+
# Remove the data URI prefix
|
710 |
+
base64_data = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", clean_b64_str)
|
711 |
+
image_data = base64.b64decode(base64_data)
|
712 |
+
|
713 |
+
# Load into PIL
|
714 |
+
img = Image.open(BytesIO(image_data))
|
715 |
+
|
716 |
+
low, high = 20, 95 # reasonable JPEG quality range
|
717 |
+
best_b64 = None
|
718 |
+
best_size_kb = 0
|
719 |
+
|
720 |
+
while low <= high:
|
721 |
+
mid = (low + high) // 2
|
722 |
+
buffer = BytesIO()
|
723 |
+
img.save(buffer, format="JPEG", quality=mid)
|
724 |
+
size_kb = len(buffer.getvalue()) / 1024
|
725 |
+
|
726 |
+
if size_kb <= max_kb:
|
727 |
+
# This quality is valid, try higher
|
728 |
+
best_b64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
|
729 |
+
best_size_kb = size_kb
|
730 |
+
low = mid + 1
|
731 |
+
else:
|
732 |
+
# Too big, try lower
|
733 |
+
high = mid - 1
|
734 |
+
|
735 |
+
return f"data:image/jpeg;base64,{best_b64}"
|
736 |
+
|
737 |
+
#clean the base64 model here
|
738 |
+
def clean_base64_for_model(raw_b64):
|
739 |
+
import io, base64, re
|
740 |
+
from PIL import Image
|
741 |
+
|
742 |
if not raw_b64:
|
743 |
+
return "", ""
|
744 |
|
|
|
745 |
if isinstance(raw_b64, list):
|
746 |
raw_b64 = raw_b64[0] if raw_b64 else ""
|
747 |
if not raw_b64:
|
748 |
+
return "", ""
|
749 |
|
|
|
750 |
if isinstance(raw_b64, Image.Image):
|
751 |
buf = io.BytesIO()
|
752 |
raw_b64.save(buf, format="PNG")
|
753 |
raw_b64 = base64.b64encode(buf.getvalue()).decode()
|
754 |
|
|
|
755 |
if not isinstance(raw_b64, str):
|
756 |
raise TypeError(f"Expected base64 string or PIL Image, got {type(raw_b64)}")
|
757 |
|
758 |
+
# Remove data URI prefix if present
|
759 |
clean_b64 = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", raw_b64)
|
760 |
clean_b64 = clean_b64.replace("\n", "").replace("\r", "").strip()
|
761 |
|
762 |
+
# Log original size
|
763 |
+
original_size = len(clean_b64.encode("utf-8"))
|
764 |
+
print(f"Original Base64 size (bytes): {original_size}")
|
765 |
+
if original_size> 4000000:
|
766 |
+
# Reduce size to under 4 MB
|
767 |
+
reduced_b64 = reduce_image_size_to_limit(clean_b64, max_kb=4000)
|
768 |
+
clean_b64_2 = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", reduced_b64)
|
769 |
+
clean_b64_2 = clean_b64_2.replace("\n", "").replace("\r", "").strip()
|
770 |
+
reduced_size = len(clean_b64_2.encode("utf-8"))
|
771 |
+
print(f"Reduced Base64 size (bytes): {reduced_size}")
|
772 |
+
# Return both prefixed and clean reduced versions
|
773 |
+
return f"data:image/jpeg;base64,{reduced_b64}"
|
774 |
+
return f"data:image/jpeg;base64,{clean_b64}"
|
775 |
|
776 |
def format_scratch_pseudo_code(code_string):
|
777 |
"""
|