LahiruD95 commited on
Commit
038bb8a
·
1 Parent(s): 4e247c2

change routes.py

Browse files
Files changed (1) hide show
  1. app/routes.py +34 -21
app/routes.py CHANGED
@@ -3,6 +3,7 @@ from werkzeug.utils import secure_filename
3
  import os
4
  import easyocr
5
  import pytesseract # Ensure this is imported
 
6
  from huggingface_hub import InferenceApi
7
 
8
  from PIL import Image
@@ -11,6 +12,10 @@ from app.config import Config
11
  from app.models import audio_model, sentiment_pipeline, emotion_pipeline, client
12
  from app.services import extract_tasks
13
  from app.utils import generate_tags, error_response
 
 
 
 
14
 
15
  # Initialize Flask Blueprint
16
  bp = Blueprint('main', __name__)
@@ -26,6 +31,10 @@ MIN_SENTIMENT_CONFIDENCE = 0.4 # Below this becomes "neutral"
26
  # =============================
27
  # 🔹 API Routes
28
  # =============================
 
 
 
 
29
 
30
  @bp.route('/transcribe', methods=['POST'])
31
  def transcribe():
@@ -88,29 +97,33 @@ def analyze_image():
88
  if 'file' not in request.files:
89
  return error_response("No image file provided", 400)
90
 
91
- f = request.files['file']
92
- path = os.path.join("/tmp", secure_filename(f.filename))
93
- f.save(path)
94
-
95
- # read raw bytes
96
- with open(path, "rb") as img_f:
97
- img_bytes = img_f.read()
98
 
 
 
 
 
 
99
  try:
100
- # 1) Ask the vision-LLM to describe / extract text
101
- completion = client.chat.completions.create(
102
- model="google/gemma-3-27b-it",
103
- messages=[{
104
- "role": "user",
105
- "content": [
106
- {"type": "text", "text": "Extract any text you see in this image."},
107
- {"type": "image_bytes", "image_bytes": {"data": img_bytes}}
108
- ]
109
- }],
110
- max_tokens=512,
111
- )
112
-
113
- extracted = completion.choices[0].message.content.strip();
 
 
 
114
 
115
  analysis = analyze_text_internal(extracted)
116
  tags = generate_tags(extracted)
 
3
  import os
4
  import easyocr
5
  import pytesseract # Ensure this is imported
6
+ import base64
7
  from huggingface_hub import InferenceApi
8
 
9
  from PIL import Image
 
12
  from app.models import audio_model, sentiment_pipeline, emotion_pipeline, client
13
  from app.services import extract_tasks
14
  from app.utils import generate_tags, error_response
15
+ from transformers import pipeline
16
+ from PIL import Image
17
+ from werkzeug.utils import secure_filename
18
+
19
 
20
  # Initialize Flask Blueprint
21
  bp = Blueprint('main', __name__)
 
31
  # =============================
32
  # 🔹 API Routes
33
  # =============================
34
+ ocr_pipe = pipeline(
35
+ "image-to-text",
36
+ model="microsoft/trocr-base-handwritten" # or "microsoft/trocr-base-printed"
37
+ )
38
 
39
  @bp.route('/transcribe', methods=['POST'])
40
  def transcribe():
 
97
  if 'file' not in request.files:
98
  return error_response("No image file provided", 400)
99
 
100
+ file = request.files["file"]
101
+ path = "/tmp/" + secure_filename(file.filename)
102
+ file.save(path)
 
 
 
 
103
 
104
+ # # read raw bytes and base64‐encode for JSON serialization
105
+ # with open(path, "rb") as img_f:
106
+ # raw_bytes = img_f.read()
107
+ # b64_str = base64.b64encode(raw_bytes).decode("utf-8")
108
+ #
109
  try:
110
+ # # 1) Ask the vision-LLM to extract text, passing base64 string
111
+ # completion = client.chat.completions.create(
112
+ # model="mistralai/Mistral-Small-3.1-24B-Instruct-2503",
113
+ # messages=[{
114
+ # "role": "user",
115
+ # "content": [
116
+ # {"type": "text", "text": "Extract any text you see in this image."},
117
+ # {"type": "image_bytes", "image_bytes": {"data": b64_str}}
118
+ # ]
119
+ # }],
120
+ # max_tokens=512,
121
+ # )
122
+ img = Image.open(path).convert("RGB")
123
+
124
+ extracted = ocr_pipe(img)
125
+
126
+ print(extracted)
127
 
128
  analysis = analyze_text_internal(extracted)
129
  tags = generate_tags(extracted)