AxleToe commited on
Commit
066a23d
·
verified ·
1 Parent(s): 1a04e0c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import gradio as gr
4
+ from PIL import Image
5
+ from transformers import VisionEncoderDecoderModel, TrOCRProcessor
6
+ import torch
7
+
8
+ print("--- Initializing Solver Service ---")
9
+
10
+ # Use a GPU if available (Hugging Face may provide one)
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
+
13
+ # --- LOAD MODELS ONLY ONCE AT STARTUP ---
14
+ print("1. Loading TrOCR processor...")
15
+ processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3", use_fast=True)
16
+ print(" - Processor loaded.")
17
+
18
+ print("2. Loading VisionEncoderDecoder model...")
19
+ model = VisionEncoderDecoderModel.from_pretrained("anuashok/ocr-captcha-v3").to(device)
20
+ print(" - Model loaded.")
21
+ print(f"--- Model is running on: {device.upper()} ---")
22
+ # --- END OF HEAVY LOADING ---
23
+
24
+
25
+ def solve_captcha(input_image: Image.Image) -> str:
26
+ """
27
+ Solves a CAPTCHA using the pre-loaded model.
28
+ This function uses the exact image processing logic from your original script.
29
+ """
30
+ print("--- Received image for solving ---")
31
+
32
+ # 1. Convert input image to RGBA (as in your original code)
33
+ image = input_image.convert("RGBA")
34
+
35
+ # 2. Prepare a white background
36
+ background = Image.new("RGBA", image.size, (255, 255, 255))
37
+
38
+ # 3. Composite the image onto the white background and convert to RGB
39
+ combined = Image.alpha_composite(background, image).convert("RGB")
40
+ print(" - Image pre-processing complete.")
41
+
42
+ # 4. Prepare image for the model
43
+ pixel_values = processor(images=combined, return_tensors="pt").pixel_values.to(device)
44
+ print(" - Image prepared for model.")
45
+
46
+ # 5. Run model inference
47
+ generated_ids = model.generate(pixel_values)
48
+ print(" - Model inference complete.")
49
+
50
+ # 6. Decode the result
51
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
52
+ print(f" - Decoding complete. Result: {generated_text}")
53
+
54
+ return generated_text
55
+
56
+
57
+ # --- Create the Gradio Interface and API Endpoint ---
58
+ gr.Interface(
59
+ fn=solve_captcha,
60
+ inputs=gr.Image(type="pil", label="Upload CAPTCHA Image"),
61
+ outputs=gr.Textbox(label="Result"),
62
+ title="TrOCR CAPTCHA Solver (Custom Logic)",
63
+ description="An API for the anuashok/ocr-captcha-v3 model using specific pre-processing."
64
+ ).launch()