File size: 2,164 Bytes
68b7b14
 
 
 
 
 
 
 
 
 
 
a1b736d
68b7b14
 
 
 
 
 
 
 
0eecb1c
 
68b7b14
0eecb1c
68b7b14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0eecb1c
 
68b7b14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import requests
import base64
import io
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import torch
import time

processor = TrOCRProcessor.from_pretrained("arcma/decap")
model = VisionEncoderDecoderModel.from_pretrained("arcma/decap")
model.eval()
# torch.compile(model)

def check(x):
    if len(x) < 6:
        return False
    if not set(x).issubset('1234567890abcdefghijklmnopqrstuvwxyz'):
        return False
    return True

@torch.jit.script
def process_image(pixel_values):
    with torch.no_grad():
        generated_ids = model.generate(pixel_values, num_beams=1, num_return_sequences=1)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
    generated_text = [x for x in generated_text if check(x)]
    return generated_text[0]

def process_html(html):
    
    orig_im = Image.open(
        io.BytesIO(
            base64.b64decode(
                html
                .partition('''" style="background:white url('data:image/jpg;base64,''')[2]
                .partition("') no-repeat")[0]
            )
        )
    )
    pixel_values = processor(orig_im, return_tensors="pt").pixel_values
    return process_image(pixel_values)



from werkzeug.wrappers import Request, Response
from flask import Flask, request
from flask import jsonify

app = Flask(__name__)

@app.route("/", methods=['POST', 'OPTIONS'])
def hello():
    try:
        return jsonify({
            'x': process_html(request.json['data'])
        })
    except:
        print('fail')
    return "Hello World!"

@app.after_request
def after_request(response):
    response.headers.add("Access-Control-Allow-Origin", "*")
    response.headers.add("Access-Control-Allow-Credentials", "true")
    response.headers.add("Access-Control-Allow-Methods", "GET,HEAD,OPTIONS,POST,PUT")
    response.headers.add("Access-Control-Allow-Headers", "Access-Control-Allow-Headers, Origin,Accept, X-Requested-With, Content-Type, Access-Control-Request-Method, Access-Control-Request-Headers")
    return response

if __name__ == '__main__':
    from werkzeug.serving import run_simple
    run_simple('0.0.0.0', 7860, app)