File size: 606 Bytes
ceaf2e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import sys
sys.path.append('../')
from smoldocling import cli
from ipywidgets import HTML
import dotenv

output_png = '../data/legislatures/AZ_h_1913_apr_special_p9.png'
output_dir = '../output/'

cli.process_files([output_png], output_dir, output_format="json")

fileName = output_png[output_png.rfind("/")+1:].replace(".png",'')
json_output = output_dir + fileName + ".json"
overlay_html = output_dir + fileName + "_overlay.html"

html_output = cli.generate_docling_overlay(output_png, json_output, overlay_html)

dotenv.load_dotenv()

cleaned_text = cli.stitch_text_from_json(json_output, gpt_fix=False)