MindPalaceAI / app /routes.py
LahiruD95's picture
change routes.py
37f303a
from flask import Blueprint, request, jsonify
from werkzeug.utils import secure_filename
import os
import pytesseract # Ensure this is imported
import base64
from huggingface_hub import InferenceApi
from PIL import Image
from app.config import Config
from app.models import audio_model, sentiment_pipeline, emotion_pipeline, client
from app.services import extract_tasks
from app.utils import generate_tags, error_response
from transformers import pipeline
from PIL import Image
from werkzeug.utils import secure_filename
# Initialize Flask Blueprint
bp = Blueprint('main', __name__)
# ── OCR via HF Inference API ─────────────────────────────────────────────────
# We're using Microsoft's TrOCR for printed text:
EMOTION_SCORE_THRESHOLD = 0.15 # Adjust based on your testing
MIN_SENTIMENT_CONFIDENCE = 0.4 # Below this becomes "neutral"
# =============================
# πŸ”Ή API Routes
# =============================
ocr_pipe = pipeline(
"image-to-text",
model="microsoft/trocr-base-handwritten" # or "microsoft/trocr-base-printed"
)
@bp.route('/transcribe', methods=['POST'])
def transcribe():
if 'file' not in request.files:
return error_response("No file provided", 400)
file = request.files['file']
file_path = os.path.join("/tmp", secure_filename(file.filename))
file.save(file_path)
try:
# Transcribe Audio
result = audio_model.transcribe(file_path)
transcription = result.get("text", "")
if not transcription.strip():
return error_response("Transcription is empty", 400)
# Send transcription to /analyze_text API
analysis_response = analyze_text_internal(transcription)
tags = generate_tags(transcription) # Function to extract tags from text
return jsonify({
"transcription": transcription,
"sentiment": analysis_response["sentiment"],
"emotion": analysis_response["emotion"],
"confidence": analysis_response["confidence"],
"tags": tags
})
except Exception as e:
return error_response(str(e), 500)
# @bp.route('/analyze_image', methods=['POST'])
# def analyze_image():
# if 'file' not in request.files:
# return error_response("No image file provided", 400)
#
# file = request.files['file']
# image_bytes = file.read()
#
# try:
# # send raw bytes to HF inference
# result = ocr_api(image_bytes)
# # TroCR returns a single string of text
# extracted = ""
# if isinstance(result, str):
# extracted = result
# elif isinstance(result, dict) and "generated_text" in result:
# extracted = result["generated_text"]
# else:
# # fallback to printing whatever we got
# extracted = str(result)
#
# extracted = extracted.strip()
# if not extracted:
# return error_response("No text extracted from image", 400)
@bp.route('/analyze_image', methods=['POST'])
def analyze_image():
if 'file' not in request.files:
return error_response("No image file provided", 400)
file = request.files["file"]
path = "/tmp/" + secure_filename(file.filename)
file.save(path)
# # read raw bytes and base64‐encode for JSON serialization
# with open(path, "rb") as img_f:
# raw_bytes = img_f.read()
# b64_str = base64.b64encode(raw_bytes).decode("utf-8")
#
try:
# # 1) Ask the vision-LLM to extract text, passing base64 string
# completion = client.chat.completions.create(
# model="mistralai/Mistral-Small-3.1-24B-Instruct-2503",
# messages=[{
# "role": "user",
# "content": [
# {"type": "text", "text": "Extract any text you see in this image."},
# {"type": "image_bytes", "image_bytes": {"data": b64_str}}
# ]
# }],
# max_tokens=512,
# )
img = Image.open(path).convert("RGB")
# run OCR pipeline, which returns a list of dicts
ocr_results = ocr_pipe(img)
# extract the generated text from the first result
extracted = ""
if isinstance(ocr_results, list) and len(ocr_results) > 0 and "generated_text" in ocr_results[0]:
extracted = ocr_results[0]["generated_text"].strip()
else:
extracted = str(ocr_results)
print("OCR extracted text:", extracted)
# now analyze the extracted string
analysis = analyze_text_internal(extracted)
tags = generate_tags(extracted)
return jsonify({
"extracted_text": extracted,
"sentiment": analysis["sentiment"],
"emotion": analysis["emotion"],
"confidence": analysis["confidence"],
"tags": tags
})
except Exception as e:
return error_response(str(e), 500)
# Internal function to call analyze_text directly
def analyze_text_internal(text):
try:
# Get sentiment (positive/neutral/negative)
sentiment = sentiment_pipeline(text)[0]
# Get dominant emotion (anger/disgust/fear/joy/neutral/sadness/surprise)
emotion = emotion_pipeline(text)[0][0]
return {
"sentiment": sentiment['label'],
"emotion": emotion['label'],
"confidence": {
"sentiment": round(sentiment['score'], 3),
"emotion": round(emotion['score'], 3)
}
}
except Exception as e:
print(f"Analysis error: {str(e)}")
return error_response(f"Processing error: {str(e)}", 500)
@bp.route('/analyze_text', methods=['POST'])
def analyze_text():
data = request.json
if not data or 'text' not in data:
return error_response("No text provided", 400)
text = data['text'].strip().lower()
try:
# Get sentiment (positive/neutral/negative)
sentiment = sentiment_pipeline(text)[0]
# Get dominant emotion (anger/disgust/fear/joy/neutral/sadness/surprise)
emotion = emotion_pipeline(text)[0][0]
tags = generate_tags(text)
return {
"sentiment": sentiment['label'],
"emotion": emotion['label'],
"confidence": {
"sentiment": round(sentiment['score'], 3),
"emotion": round(emotion['score'], 3)
},
"tags": tags
}
except Exception as e:
print(f"Analysis error: {str(e)}")
return error_response(f"Processing error: {str(e)}", 500)
# πŸ“Œ 3. Extract Actionable Tasks
@bp.route('/extract_actions', methods=['POST'])
def extract_actions():
data = request.json
if not data or 'text' not in data:
return error_response("No text provided", 400)
text = data['text']
try:
tasks = extract_tasks(text)
return jsonify({"tasks": tasks})
except Exception as e:
return error_response(str(e), 500)
# =============================
# πŸ”Ή Error Handling
# =============================
@bp.errorhandler(404)
def not_found_error(error):
return error_response("Not Found", 404)
@bp.errorhandler(500)
def internal_error(error):
return error_response("Internal Server Error", 500)