arcma commited on
Commit
20bc8b5
·
0 Parent(s):

Duplicate from arcma/decap

Browse files
Files changed (5) hide show
  1. .gitattributes +34 -0
  2. Dockerfile +15 -0
  3. README.md +11 -0
  4. requirements.txt +8 -0
  5. run.py +71 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ WORKDIR /home/user
6
+
7
+ COPY ./requirements.txt /home/user/requirements.txt
8
+
9
+ RUN pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
10
+ RUN pip install --no-cache-dir --upgrade -r /home/user/requirements.txt
11
+
12
+ COPY --chown=user . .
13
+ CMD chmod -R 777 /home/user
14
+
15
+ CMD ["python", "run.py"]
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Decap
3
+ emoji: 😻
4
+ colorFrom: indigo
5
+ colorTo: yellow
6
+ sdk: docker
7
+ pinned: false
8
+ duplicated_from: arcma/decap
9
+ ---
10
+
11
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ requests
2
+ transformers
3
+ torch
4
+ Pillow
5
+ flask
6
+ werkzeug
7
+ sentencepiece
8
+ protobuf==3.20.0
run.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import base64
3
+ import io
4
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
5
+ from PIL import Image
6
+ import torch
7
+ import time
8
+
9
+ processor = TrOCRProcessor.from_pretrained("arcma/decap")
10
+ model = VisionEncoderDecoderModel.from_pretrained("arcma/decap")
11
+ model.eval()
12
+ # torch.compile(model)
13
+
14
+ def check(x):
15
+ if len(x) < 6:
16
+ return False
17
+ if not set(x).issubset('1234567890abcdefghijklmnopqrstuvwxyz'):
18
+ return False
19
+ return True
20
+
21
+ def process_image(image):
22
+ pixel_values = processor(image, return_tensors="pt").pixel_values
23
+ with torch.no_grad():
24
+ generated_ids = model.generate(pixel_values, num_beams=4, num_return_sequences=4)
25
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
26
+ generated_text = [x for x in generated_text if check(x)]
27
+ return generated_text[0]
28
+
29
+ def process_html(html):
30
+
31
+ orig_im = Image.open(
32
+ io.BytesIO(
33
+ base64.b64decode(
34
+ html
35
+ .partition('''" style="background:white url('data:image/jpg;base64,''')[2]
36
+ .partition("') no-repeat")[0]
37
+ )
38
+ )
39
+ )
40
+
41
+ return process_image(orig_im)
42
+
43
+
44
+
45
+ from werkzeug.wrappers import Request, Response
46
+ from flask import Flask, request
47
+ from flask import jsonify
48
+
49
+ app = Flask(__name__)
50
+
51
+ @app.route("/", methods=['POST', 'OPTIONS'])
52
+ def hello():
53
+ try:
54
+ return jsonify({
55
+ 'x': process_html(request.json['data'])
56
+ })
57
+ except:
58
+ print('fail')
59
+ return "Hello World!"
60
+
61
+ @app.after_request
62
+ def after_request(response):
63
+ response.headers.add("Access-Control-Allow-Origin", "*")
64
+ response.headers.add("Access-Control-Allow-Credentials", "true")
65
+ response.headers.add("Access-Control-Allow-Methods", "GET,HEAD,OPTIONS,POST,PUT")
66
+ response.headers.add("Access-Control-Allow-Headers", "Access-Control-Allow-Headers, Origin,Accept, X-Requested-With, Content-Type, Access-Control-Request-Method, Access-Control-Request-Headers")
67
+ return response
68
+
69
+ if __name__ == '__main__':
70
+ from werkzeug.serving import run_simple
71
+ run_simple('0.0.0.0', 7860, app)