prthm11 commited on
Commit
ca5d911
·
verified ·
1 Parent(s): ec5ee60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -22
app.py CHANGED
@@ -654,47 +654,124 @@ def extract_json_from_llm_response(raw_response: str) -> dict:
654
  logger.error("Sanitized JSON still invalid:\n%s", json_string)
655
  raise
656
 
657
- def clean_base64_for_model(raw_b64):
658
- """
659
- Normalize input into a valid data:image/png;base64,<payload> string.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
660
 
661
- Accepts:
662
- - a list of base64 strings → picks the first element
663
- - a PIL Image instance → encodes to PNG/base64
664
- - a raw base64 string → strips whitespace and data URI prefix
665
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
666
  if not raw_b64:
667
- return ""
668
 
669
- # 1. If it’s a list, take its first element
670
  if isinstance(raw_b64, list):
671
  raw_b64 = raw_b64[0] if raw_b64 else ""
672
  if not raw_b64:
673
- return ""
674
 
675
- # 2. If it’s a PIL Image, convert to base64
676
  if isinstance(raw_b64, Image.Image):
677
  buf = io.BytesIO()
678
  raw_b64.save(buf, format="PNG")
679
  raw_b64 = base64.b64encode(buf.getvalue()).decode()
680
 
681
- # 3. At this point it must be a string
682
  if not isinstance(raw_b64, str):
683
  raise TypeError(f"Expected base64 string or PIL Image, got {type(raw_b64)}")
684
 
685
- # 4. Strip any existing data URI prefix, whitespace, or newlines
686
  clean_b64 = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", raw_b64)
687
  clean_b64 = clean_b64.replace("\n", "").replace("\r", "").strip()
688
 
689
- # 5. Validate it’s proper base64
690
- try:
691
- base64.b64decode(clean_b64)
692
- except Exception as e:
693
- logger.error(f"Invalid Base64 passed to model: {e}")
694
- raise
695
-
696
- # 6. Return with the correct data URI prefix
697
- return f"data:image/png;base64,{clean_b64}"
 
 
 
 
698
 
699
  def format_scratch_pseudo_code(code_string):
700
  """
 
654
  logger.error("Sanitized JSON still invalid:\n%s", json_string)
655
  raise
656
 
657
+ # def clean_base64_for_model(raw_b64):
658
+ # """
659
+ # Normalize input into a valid data:image/png;base64,<payload> string.
660
+
661
+ # Accepts:
662
+ # - a list of base64 strings → picks the first element
663
+ # - a PIL Image instance → encodes to PNG/base64
664
+ # - a raw base64 string → strips whitespace and data URI prefix
665
+ # """
666
+ # if not raw_b64:
667
+ # return ""
668
+
669
+ # # 1. If it’s a list, take its first element
670
+ # if isinstance(raw_b64, list):
671
+ # raw_b64 = raw_b64[0] if raw_b64 else ""
672
+ # if not raw_b64:
673
+ # return ""
674
+
675
+ # # 2. If it’s a PIL Image, convert to base64
676
+ # if isinstance(raw_b64, Image.Image):
677
+ # buf = io.BytesIO()
678
+ # raw_b64.save(buf, format="PNG")
679
+ # raw_b64 = base64.b64encode(buf.getvalue()).decode()
680
+
681
+ # # 3. At this point it must be a string
682
+ # if not isinstance(raw_b64, str):
683
+ # raise TypeError(f"Expected base64 string or PIL Image, got {type(raw_b64)}")
684
+
685
+ # # 4. Strip any existing data URI prefix, whitespace, or newlines
686
+ # clean_b64 = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", raw_b64)
687
+ # clean_b64 = clean_b64.replace("\n", "").replace("\r", "").strip()
688
+
689
+ # # 5. Validate it’s proper base64
690
+ # try:
691
+ # base64.b64decode(clean_b64)
692
+ # except Exception as e:
693
+ # logger.error(f"Invalid Base64 passed to model: {e}")
694
+ # raise
695
+
696
+ # # 6. Return with the correct data URI prefix
697
+ # return f"data:image/png;base64,{clean_b64}"
698
 
699
+ # reducing imagebase64 size if greater than something
700
+ def reduce_image_size_to_limit(clean_b64_str, max_kb=4000):
 
 
701
  """
702
+ Reduce an image's size to be as close as possible to max_kb without exceeding it.
703
+ Returns the final base64 string and its size in KB.
704
+ """
705
+ import re, base64
706
+ from io import BytesIO
707
+ from PIL import Image
708
+
709
+ # Remove the data URI prefix
710
+ base64_data = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", clean_b64_str)
711
+ image_data = base64.b64decode(base64_data)
712
+
713
+ # Load into PIL
714
+ img = Image.open(BytesIO(image_data))
715
+
716
+ low, high = 20, 95 # reasonable JPEG quality range
717
+ best_b64 = None
718
+ best_size_kb = 0
719
+
720
+ while low <= high:
721
+ mid = (low + high) // 2
722
+ buffer = BytesIO()
723
+ img.save(buffer, format="JPEG", quality=mid)
724
+ size_kb = len(buffer.getvalue()) / 1024
725
+
726
+ if size_kb <= max_kb:
727
+ # This quality is valid, try higher
728
+ best_b64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
729
+ best_size_kb = size_kb
730
+ low = mid + 1
731
+ else:
732
+ # Too big, try lower
733
+ high = mid - 1
734
+
735
+ return f"data:image/jpeg;base64,{best_b64}"
736
+
737
+ #clean the base64 model here
738
+ def clean_base64_for_model(raw_b64):
739
+ import io, base64, re
740
+ from PIL import Image
741
+
742
  if not raw_b64:
743
+ return "", ""
744
 
 
745
  if isinstance(raw_b64, list):
746
  raw_b64 = raw_b64[0] if raw_b64 else ""
747
  if not raw_b64:
748
+ return "", ""
749
 
 
750
  if isinstance(raw_b64, Image.Image):
751
  buf = io.BytesIO()
752
  raw_b64.save(buf, format="PNG")
753
  raw_b64 = base64.b64encode(buf.getvalue()).decode()
754
 
 
755
  if not isinstance(raw_b64, str):
756
  raise TypeError(f"Expected base64 string or PIL Image, got {type(raw_b64)}")
757
 
758
+ # Remove data URI prefix if present
759
  clean_b64 = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", raw_b64)
760
  clean_b64 = clean_b64.replace("\n", "").replace("\r", "").strip()
761
 
762
+ # Log original size
763
+ original_size = len(clean_b64.encode("utf-8"))
764
+ print(f"Original Base64 size (bytes): {original_size}")
765
+ if original_size> 4000000:
766
+ # Reduce size to under 4 MB
767
+ reduced_b64 = reduce_image_size_to_limit(clean_b64, max_kb=4000)
768
+ clean_b64_2 = re.sub(r"^data:image\/[a-zA-Z]+;base64,", "", reduced_b64)
769
+ clean_b64_2 = clean_b64_2.replace("\n", "").replace("\r", "").strip()
770
+ reduced_size = len(clean_b64_2.encode("utf-8"))
771
+ print(f"Reduced Base64 size (bytes): {reduced_size}")
772
+ # Return both prefixed and clean reduced versions
773
+ return f"data:image/jpeg;base64,{reduced_b64}"
774
+ return f"data:image/jpeg;base64,{clean_b64}"
775
 
776
  def format_scratch_pseudo_code(code_string):
777
  """