from flask import Blueprint, request, jsonify from werkzeug.utils import secure_filename import os import pytesseract # Ensure this is imported import base64 from huggingface_hub import InferenceApi from PIL import Image from app.config import Config from app.models import audio_model, sentiment_pipeline, emotion_pipeline, client from app.services import extract_tasks from app.utils import generate_tags, error_response from transformers import pipeline from PIL import Image from werkzeug.utils import secure_filename # Initialize Flask Blueprint bp = Blueprint('main', __name__) # ── OCR via HF Inference API ───────────────────────────────────────────────── # We're using Microsoft's TrOCR for printed text: EMOTION_SCORE_THRESHOLD = 0.15 # Adjust based on your testing MIN_SENTIMENT_CONFIDENCE = 0.4 # Below this becomes "neutral" # ============================= # 🔹 API Routes # ============================= ocr_pipe = pipeline( "image-to-text", model="microsoft/trocr-base-handwritten" # or "microsoft/trocr-base-printed" ) @bp.route('/transcribe', methods=['POST']) def transcribe(): if 'file' not in request.files: return error_response("No file provided", 400) file = request.files['file'] file_path = os.path.join("/tmp", secure_filename(file.filename)) file.save(file_path) try: # Transcribe Audio result = audio_model.transcribe(file_path) transcription = result.get("text", "") if not transcription.strip(): return error_response("Transcription is empty", 400) # Send transcription to /analyze_text API analysis_response = analyze_text_internal(transcription) tags = generate_tags(transcription) # Function to extract tags from text return jsonify({ "transcription": transcription, "sentiment": analysis_response["sentiment"], "emotion": analysis_response["emotion"], "confidence": analysis_response["confidence"], "tags": tags }) except Exception as e: return error_response(str(e), 500) # @bp.route('/analyze_image', methods=['POST']) # def analyze_image(): # if 'file' not in request.files: # return error_response("No image file provided", 400) # # file = request.files['file'] # image_bytes = file.read() # # try: # # send raw bytes to HF inference # result = ocr_api(image_bytes) # # TroCR returns a single string of text # extracted = "" # if isinstance(result, str): # extracted = result # elif isinstance(result, dict) and "generated_text" in result: # extracted = result["generated_text"] # else: # # fallback to printing whatever we got # extracted = str(result) # # extracted = extracted.strip() # if not extracted: # return error_response("No text extracted from image", 400) @bp.route('/analyze_image', methods=['POST']) def analyze_image(): if 'file' not in request.files: return error_response("No image file provided", 400) file = request.files["file"] path = "/tmp/" + secure_filename(file.filename) file.save(path) # # read raw bytes and base64‐encode for JSON serialization # with open(path, "rb") as img_f: # raw_bytes = img_f.read() # b64_str = base64.b64encode(raw_bytes).decode("utf-8") # try: # # 1) Ask the vision-LLM to extract text, passing base64 string # completion = client.chat.completions.create( # model="mistralai/Mistral-Small-3.1-24B-Instruct-2503", # messages=[{ # "role": "user", # "content": [ # {"type": "text", "text": "Extract any text you see in this image."}, # {"type": "image_bytes", "image_bytes": {"data": b64_str}} # ] # }], # max_tokens=512, # ) img = Image.open(path).convert("RGB") # run OCR pipeline, which returns a list of dicts ocr_results = ocr_pipe(img) # extract the generated text from the first result extracted = "" if isinstance(ocr_results, list) and len(ocr_results) > 0 and "generated_text" in ocr_results[0]: extracted = ocr_results[0]["generated_text"].strip() else: extracted = str(ocr_results) print("OCR extracted text:", extracted) # now analyze the extracted string analysis = analyze_text_internal(extracted) tags = generate_tags(extracted) return jsonify({ "extracted_text": extracted, "sentiment": analysis["sentiment"], "emotion": analysis["emotion"], "confidence": analysis["confidence"], "tags": tags }) except Exception as e: return error_response(str(e), 500) # Internal function to call analyze_text directly def analyze_text_internal(text): try: # Get sentiment (positive/neutral/negative) sentiment = sentiment_pipeline(text)[0] # Get dominant emotion (anger/disgust/fear/joy/neutral/sadness/surprise) emotion = emotion_pipeline(text)[0][0] return { "sentiment": sentiment['label'], "emotion": emotion['label'], "confidence": { "sentiment": round(sentiment['score'], 3), "emotion": round(emotion['score'], 3) } } except Exception as e: print(f"Analysis error: {str(e)}") return error_response(f"Processing error: {str(e)}", 500) @bp.route('/analyze_text', methods=['POST']) def analyze_text(): data = request.json if not data or 'text' not in data: return error_response("No text provided", 400) text = data['text'].strip().lower() try: # Get sentiment (positive/neutral/negative) sentiment = sentiment_pipeline(text)[0] # Get dominant emotion (anger/disgust/fear/joy/neutral/sadness/surprise) emotion = emotion_pipeline(text)[0][0] tags = generate_tags(text) return { "sentiment": sentiment['label'], "emotion": emotion['label'], "confidence": { "sentiment": round(sentiment['score'], 3), "emotion": round(emotion['score'], 3) }, "tags": tags } except Exception as e: print(f"Analysis error: {str(e)}") return error_response(f"Processing error: {str(e)}", 500) # 📌 3. Extract Actionable Tasks @bp.route('/extract_actions', methods=['POST']) def extract_actions(): data = request.json if not data or 'text' not in data: return error_response("No text provided", 400) text = data['text'] try: tasks = extract_tasks(text) return jsonify({"tasks": tasks}) except Exception as e: return error_response(str(e), 500) # ============================= # 🔹 Error Handling # ============================= @bp.errorhandler(404) def not_found_error(error): return error_response("Not Found", 404) @bp.errorhandler(500) def internal_error(error): return error_response("Internal Server Error", 500)