Spaces:
Running
Running
File size: 2,140 Bytes
ceaf2e8 c6954d9 d8475f5 ceaf2e8 c6954d9 ceaf2e8 f6cfeb8 ceaf2e8 3a4c6fc ceaf2e8 c6954d9 ceaf2e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import os
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from typing import List
from smoldocling import cli
import shutil
import dotenv
os.environ["TRANSFORMERS_CACHE"] = "/app/.cache/transformers"
os.environ["HF_HUB_CACHE"] = "/app/.cache/hub"
# Load environment variables
dotenv.load_dotenv()
# Initialize FastAPI app
app = FastAPI()
# Enable CORS (optional, but good for dev/testing)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# Ensure directories exist
UPLOAD_DIR = "/tmp/uploads"
OUTPUT_DIR = "/tmp/output"
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
def docling_process_files(file_list: List[str]) -> str:
cli.process_files(file_list, OUTPUT_DIR, output_format='json')
file_path = file_list[0].replace('\\', '/')
file_name = os.path.splitext(os.path.basename(file_path))[0]
json_output = os.path.join(OUTPUT_DIR, f"{file_name}.json")
overlay_html = os.path.join(OUTPUT_DIR, f"{file_name}_overlay.html")
# Generate overlay (optional)
cli.generate_docling_overlay(file_path, json_output, overlay_html)
# Stitch final cleaned text (you can toggle GPT fixing)
cleaned_text = cli.stitch_text_from_json(json_output, gpt_fix=False)
return cleaned_text
@app.get("/")
def root():
return JSONResponse(content={"message": "Root is working"})
@app.get("/health")
def health_check():
return JSONResponse(content={"status": "ok"})
@app.post("/parse")
async def parse_docling(file: UploadFile = File(...)):
if not file:
raise HTTPException(status_code=400, detail="No file uploaded.")
save_path = os.path.join(UPLOAD_DIR, file.filename)
with open(save_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
try:
text_output = docling_process_files([save_path])
return JSONResponse(content={"text": text_output})
except Exception as e:
return JSONResponse(status_code=500, content={"error": str(e)})
|