File size: 1,204 Bytes
7e8e988
 
0b19cec
22379c6
 
7e8e988
22379c6
df4c46f
22379c6
 
 
 
 
 
 
0e48a80
 
0b19cec
 
 
 
 
 
df4c46f
7e8e988
df4c46f
 
7e8e988
 
0e48a80
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import asyncio

from fastapi import File, UploadFile, HTTPException

from ocr.api.message import ocr_router
from ocr.api.message.openai_request import generate_report, extract_original_text
from ocr.api.message.schemas import OcrResponse
from ocr.api.message.utils import divide_images, clean_response, extract_text_from_images
from ocr.core.wrappers import OcrResponseWrapper


@ocr_router.post('/parse')
async def get_all_chat_messages(
        file: UploadFile = File(...)
) -> OcrResponseWrapper[OcrResponse]:
    try:
        contents = await file.read()
        if file.filename.endswith('.pdf'):
            images = divide_images(contents)
        elif file.filename.endswith(('.jpg', ".jpeg", ".png")):
            images = [contents]
        else:
            raise HTTPException(status_code=400, detail='Unsupported file type.')
        text_content = extract_text_from_images(images)
        original_text, response = await asyncio.gather(
            extract_original_text(text_content),
            generate_report(text_content)
        )
        return OcrResponseWrapper(data=OcrResponse(text=clean_response(response), originalText=original_text))
    finally:
        await file.close()