Spaces:
Build error
Build error
| import uvicorn | |
| from fastapi.staticfiles import StaticFiles | |
| import hashlib | |
| from enum import Enum | |
| from fastapi import FastAPI, UploadFile, File | |
| from paddleocr import PaddleOCR, PPStructure, save_structure_res | |
| from PIL import Image | |
| import io | |
| import numpy as np | |
| app = FastAPI(docs_url='/') | |
| use_gpu = False | |
| output_dir = 'output' | |
| class LangEnum(str, Enum): | |
| ch = "ch" | |
| en = "en" | |
| # cache with ocr | |
| ocr_cache = {} | |
| # get ocr ins | |
| def get_ocr(lang, use_gpu=False): | |
| if not ocr_cache.get(lang): | |
| ocr_cache[lang] = PaddleOCR(use_angle_cls=True, lang=lang, use_gpu=use_gpu) | |
| return ocr_cache.get(lang) | |
| async def create_upload_file( | |
| file: UploadFile = File(...), | |
| lang: LangEnum = LangEnum.ch, | |
| # use_gpu: bool = False | |
| ): | |
| contents = await file.read() | |
| image = Image.open(io.BytesIO(contents)) | |
| ocr = get_ocr(lang=lang, use_gpu=use_gpu) | |
| img2np = np.array(image) | |
| result = ocr.ocr(img2np, cls=True)[0] | |
| boxes = [line[0] for line in result] | |
| txts = [line[1][0] for line in result] | |
| scores = [line[1][1] for line in result] | |
| # 识别结果 | |
| final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)] | |
| return final_result | |
| async def create_upload_file( | |
| file: UploadFile = File(...), | |
| lang: LangEnum = LangEnum.ch, | |
| # use_gpu: bool = False | |
| ): | |
| table_engine = PPStructure(show_log=True, table=True, lang=lang) | |
| contents = await file.read() | |
| # 计算文件内容的哈希值 | |
| file_hash = hashlib.sha256(contents).hexdigest() | |
| image = Image.open(io.BytesIO(contents)) | |
| img2np = np.array(image) | |
| result = table_engine(img2np) | |
| save_structure_res(result, output_dir, f'{file_hash}') | |
| htmls = [] | |
| types = [] | |
| bboxes = [] | |
| for item in result: | |
| item_res = item.get('res', {}) | |
| htmls.append(item_res.get('html', '')) | |
| types.append(item.get('type', '')) | |
| bboxes.append(item.get('bbox', '')) | |
| return { | |
| 'htmls': htmls, | |
| 'hash': file_hash, | |
| 'bboxes': bboxes, | |
| 'types': types, | |
| } | |
| if __name__ == '__main__': | |
| app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output") | |
| uvicorn.run(app=app, port=7860) |