Spaces:

LahiruD95
/

MindPalaceAI

Sleeping

File size: 7,799 Bytes

from flask import Blueprint, request, jsonify
from werkzeug.utils import secure_filename
import os
import pytesseract  # Ensure this is imported
import base64
from huggingface_hub import InferenceApi

from PIL import Image

from app.config import Config
from app.models import audio_model, sentiment_pipeline, emotion_pipeline, client
from app.services import extract_tasks
from app.utils import generate_tags, error_response
from transformers import pipeline
from PIL import Image
from werkzeug.utils import secure_filename


# Initialize Flask Blueprint
bp = Blueprint('main', __name__)

# ── OCR via HF Inference API ─────────────────────────────────────────────────
# We're using Microsoft's TrOCR for printed text:

EMOTION_SCORE_THRESHOLD = 0.15  # Adjust based on your testing
MIN_SENTIMENT_CONFIDENCE = 0.4  # Below this becomes "neutral"

# =============================
# 🔹 API Routes
# =============================
ocr_pipe = pipeline(
    "image-to-text",
    model="microsoft/trocr-base-handwritten"   # or "microsoft/trocr-base-printed"
)

@bp.route('/transcribe', methods=['POST'])
def transcribe():
    if 'file' not in request.files:
        return error_response("No file provided", 400)

    file = request.files['file']
    file_path = os.path.join("/tmp", secure_filename(file.filename))
    file.save(file_path)

    try:
        # Transcribe Audio
        result = audio_model.transcribe(file_path)
        transcription = result.get("text", "")

        if not transcription.strip():
            return error_response("Transcription is empty", 400)

        # Send transcription to /analyze_text API
        analysis_response = analyze_text_internal(transcription)
        tags = generate_tags(transcription)  # Function to extract tags from text

        return jsonify({
            "transcription": transcription,
            "sentiment": analysis_response["sentiment"],
            "emotion": analysis_response["emotion"],
            "confidence": analysis_response["confidence"],
            "tags": tags
        })
    except Exception as e:
        return error_response(str(e), 500)


# @bp.route('/analyze_image', methods=['POST'])
# def analyze_image():
#     if 'file' not in request.files:
#         return error_response("No image file provided", 400)
#
#     file = request.files['file']
#     image_bytes = file.read()
#
#     try:
#         # send raw bytes to HF inference
#         result = ocr_api(image_bytes)
#         # TroCR returns a single string of text
#         extracted = ""
#         if isinstance(result, str):
#             extracted = result
#         elif isinstance(result, dict) and "generated_text" in result:
#             extracted = result["generated_text"]
#         else:
#             # fallback to printing whatever we got
#             extracted = str(result)
#
#         extracted = extracted.strip()
#         if not extracted:
#             return error_response("No text extracted from image", 400)
@bp.route('/analyze_image', methods=['POST'])
def analyze_image():
            if 'file' not in request.files:
                return error_response("No image file provided", 400)

            file = request.files["file"]
            path = "/tmp/" + secure_filename(file.filename)
            file.save(path)

            # # read raw bytes and base64‐encode for JSON serialization
            # with open(path, "rb") as img_f:
            #     raw_bytes = img_f.read()
            # b64_str = base64.b64encode(raw_bytes).decode("utf-8")
            #
            try:
            #     # 1) Ask the vision-LLM to extract text, passing base64 string
            #     completion = client.chat.completions.create(
            #         model="mistralai/Mistral-Small-3.1-24B-Instruct-2503",
            #         messages=[{
            #             "role": "user",
            #             "content": [
            #                 {"type": "text", "text": "Extract any text you see in this image."},
            #                 {"type": "image_bytes", "image_bytes": {"data": b64_str}}
            #             ]
            #         }],
            #         max_tokens=512,
            #     )
                img = Image.open(path).convert("RGB")

                # run OCR pipeline, which returns a list of dicts
                ocr_results = ocr_pipe(img)
                # extract the generated text from the first result
                extracted = ""
                if isinstance(ocr_results, list) and len(ocr_results) > 0 and "generated_text" in ocr_results[0]:
                    extracted = ocr_results[0]["generated_text"].strip()
                else:
                    extracted = str(ocr_results)

                print("OCR extracted text:", extracted)

                # now analyze the extracted string
                analysis = analyze_text_internal(extracted)

                tags     = generate_tags(extracted)
                return jsonify({
                    "extracted_text": extracted,
                    "sentiment":     analysis["sentiment"],
                    "emotion":       analysis["emotion"],
                    "confidence":    analysis["confidence"],
                    "tags":          tags
                })
            except Exception as e:
                return error_response(str(e), 500)






# Internal function to call analyze_text directly
def analyze_text_internal(text):
    try:
        # Get sentiment (positive/neutral/negative)
        sentiment = sentiment_pipeline(text)[0]

        # Get dominant emotion (anger/disgust/fear/joy/neutral/sadness/surprise)
        emotion = emotion_pipeline(text)[0][0]

        return {
            "sentiment": sentiment['label'],
            "emotion": emotion['label'],
            "confidence": {
                "sentiment": round(sentiment['score'], 3),
                "emotion": round(emotion['score'], 3)
            }
        }
    except Exception as e:
        print(f"Analysis error: {str(e)}")
        return error_response(f"Processing error: {str(e)}", 500)


@bp.route('/analyze_text', methods=['POST'])
def analyze_text():
    data = request.json
    if not data or 'text' not in data:
        return error_response("No text provided", 400)

    text = data['text'].strip().lower()

    try:
        # Get sentiment (positive/neutral/negative)
        sentiment = sentiment_pipeline(text)[0]

        # Get dominant emotion (anger/disgust/fear/joy/neutral/sadness/surprise)
        emotion = emotion_pipeline(text)[0][0]

        tags = generate_tags(text)

        return {
            "sentiment": sentiment['label'],
            "emotion": emotion['label'],
            "confidence": {
                "sentiment": round(sentiment['score'], 3),
                "emotion": round(emotion['score'], 3)
            },
            "tags": tags
        }
    except Exception as e:
        print(f"Analysis error: {str(e)}")
        return error_response(f"Processing error: {str(e)}", 500)


# 📌 3. Extract Actionable Tasks
@bp.route('/extract_actions', methods=['POST'])
def extract_actions():
    data = request.json
    if not data or 'text' not in data:
        return error_response("No text provided", 400)

    text = data['text']
    try:
        tasks = extract_tasks(text)
        return jsonify({"tasks": tasks})
    except Exception as e:
        return error_response(str(e), 500)


# =============================
# 🔹 Error Handling
# =============================

@bp.errorhandler(404)
def not_found_error(error):
    return error_response("Not Found", 404)

@bp.errorhandler(500)
def internal_error(error):
    return error_response("Internal Server Error", 500)