Spaces:

LahiruD95
/

MindPalaceAI

Sleeping

App Files Files Community

LahiruD95 commited on May 3

Commit

038bb8a

1 Parent(s): 4e247c2

change routes.py

Browse files

Files changed (1) hide show

app/routes.py +34 -21

app/routes.py CHANGED Viewed

@@ -3,6 +3,7 @@ from werkzeug.utils import secure_filename
 import os
 import easyocr
 import pytesseract  # Ensure this is imported
 from huggingface_hub import InferenceApi
 from PIL import Image
@@ -11,6 +12,10 @@ from app.config import Config
 from app.models import audio_model, sentiment_pipeline, emotion_pipeline, client
 from app.services import extract_tasks
 from app.utils import generate_tags, error_response
 # Initialize Flask Blueprint
 bp = Blueprint('main', __name__)
@@ -26,6 +31,10 @@ MIN_SENTIMENT_CONFIDENCE = 0.4  # Below this becomes "neutral"
 # =============================
 # 🔹 API Routes
 # =============================
 @bp.route('/transcribe', methods=['POST'])
 def transcribe():
@@ -88,29 +97,33 @@ def analyze_image():
             if 'file' not in request.files:
                 return error_response("No image file provided", 400)
-            f = request.files['file']
-            path = os.path.join("/tmp", secure_filename(f.filename))
-            f.save(path)
-            # read raw bytes
-            with open(path, "rb") as img_f:
-                img_bytes = img_f.read()
             try:
-                # 1) Ask the vision-LLM to describe / extract text
-                completion = client.chat.completions.create(
-                    model="google/gemma-3-27b-it",
-                    messages=[{
-                        "role": "user",
-                        "content": [
-                            {"type": "text", "text": "Extract any text you see in this image."},
-                            {"type": "image_bytes", "image_bytes": {"data": img_bytes}}
-                        ]
-                    }],
-                    max_tokens=512,
-                )
-                extracted = completion.choices[0].message.content.strip();
                 analysis = analyze_text_internal(extracted)
                 tags     = generate_tags(extracted)

 import os
 import easyocr
 import pytesseract  # Ensure this is imported
+import base64
 from huggingface_hub import InferenceApi
 from PIL import Image
 from app.models import audio_model, sentiment_pipeline, emotion_pipeline, client
 from app.services import extract_tasks
 from app.utils import generate_tags, error_response
+from transformers import pipeline
+from PIL import Image
+from werkzeug.utils import secure_filename
 # Initialize Flask Blueprint
 bp = Blueprint('main', __name__)
 # =============================
 # 🔹 API Routes
 # =============================
+ocr_pipe = pipeline(
+    "image-to-text",
+    model="microsoft/trocr-base-handwritten"   # or "microsoft/trocr-base-printed"
+)
 @bp.route('/transcribe', methods=['POST'])
 def transcribe():
             if 'file' not in request.files:
                 return error_response("No image file provided", 400)
+            file = request.files["file"]
+            path = "/tmp/" + secure_filename(file.filename)
+            file.save(path)
+            # # read raw bytes and base64‐encode for JSON serialization
+            # with open(path, "rb") as img_f:
+            #     raw_bytes = img_f.read()
+            # b64_str = base64.b64encode(raw_bytes).decode("utf-8")
+            #
             try:
+            #     # 1) Ask the vision-LLM to extract text, passing base64 string
+            #     completion = client.chat.completions.create(
+            #         model="mistralai/Mistral-Small-3.1-24B-Instruct-2503",
+            #         messages=[{
+            #             "role": "user",
+            #             "content": [
+            #                 {"type": "text", "text": "Extract any text you see in this image."},
+            #                 {"type": "image_bytes", "image_bytes": {"data": b64_str}}
+            #             ]
+            #         }],
+            #         max_tokens=512,
+            #     )
+                img = Image.open(path).convert("RGB")
+                extracted = ocr_pipe(img)
+                print(extracted)
                 analysis = analyze_text_internal(extracted)
                 tags     = generate_tags(extracted)