ArcMa commited on
Commit
68b7b14
·
1 Parent(s): fc58215
Files changed (3) hide show
  1. Dockerfile +11 -0
  2. requirements.txt +7 -0
  3. run.py +70 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["python", "run.py"]
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ requests
2
+ transformers
3
+ torch
4
+ Pillow
5
+ flask
6
+ werkzeug
7
+
run.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import base64
3
+ import io
4
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
5
+ from PIL import Image
6
+ import torch
7
+ import time
8
+
9
+ processor = TrOCRProcessor.from_pretrained("arcma/decap")
10
+ model = VisionEncoderDecoderModel.from_pretrained("arcma/decap")
11
+ model.eval()
12
+
13
+ def check(x):
14
+ if len(x) < 6:
15
+ return False
16
+ if not set(x).issubset('1234567890abcdefghijklmnopqrstuvwxyz'):
17
+ return False
18
+ return True
19
+
20
+ def process_image(image):
21
+ pixel_values = processor(image, return_tensors="pt").pixel_values
22
+ with torch.no_grad():
23
+ generated_ids = model.generate(pixel_values, num_beams=4, num_return_sequences=4)
24
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
25
+ generated_text = [x for x in generated_text if check(x)]
26
+ return generated_text[0]
27
+
28
+ def process_html(html):
29
+
30
+ orig_im = Image.open(
31
+ io.BytesIO(
32
+ base64.b64decode(
33
+ html
34
+ .partition('''" style="background:white url('data:image/jpg;base64,''')[2]
35
+ .partition("') no-repeat")[0]
36
+ )
37
+ )
38
+ )
39
+
40
+ return process_image(orig_im)
41
+
42
+
43
+
44
+ from werkzeug.wrappers import Request, Response
45
+ from flask import Flask, request
46
+ from flask import jsonify
47
+
48
+ app = Flask(__name__)
49
+
50
+ @app.route("/", methods=['POST', 'OPTIONS'])
51
+ def hello():
52
+ try:
53
+ return jsonify({
54
+ 'x': process_html(request.json['data'])
55
+ })
56
+ except:
57
+ print('fail')
58
+ return "Hello World!"
59
+
60
+ @app.after_request
61
+ def after_request(response):
62
+ response.headers.add("Access-Control-Allow-Origin", "*")
63
+ response.headers.add("Access-Control-Allow-Credentials", "true")
64
+ response.headers.add("Access-Control-Allow-Methods", "GET,HEAD,OPTIONS,POST,PUT")
65
+ response.headers.add("Access-Control-Allow-Headers", "Access-Control-Allow-Headers, Origin,Accept, X-Requested-With, Content-Type, Access-Control-Request-Method, Access-Control-Request-Headers")
66
+ return response
67
+
68
+ if __name__ == '__main__':
69
+ from werkzeug.serving import run_simple
70
+ run_simple('0.0.0.0', 7860, app)