Spaces:

AkashDataScience
/

ReceiptRAG

Sleeping

File size: 2,578 Bytes

eb1ac6d
a2d9609
 
 
eb1ac6d
 
 
 
a2d9609
 
 
eb1ac6d
 
 
 
 
 
 
a2d9609
 
 
dc7c4f3
a2d9609
dc7c4f3
eb1ac6d
 
 
 
 
 
 
 
 
0f8ba6a
 
a2d9609
 
 
ea06666
a2d9609
0e04d08
2570fea
0bea06b
3b5d38c
 
 
 
6f9cae9
d1176bd
3b5d38c
 
2570fea
 
0bea06b
2570fea
 
 
 
 
 
 
 
 
 
a2d9609

import os
import easyocr
import gradio as gr
from PIL import Image
from llama_index.core import Settings
from llama_index.llms.gemini import Gemini
from llama_index.core import Document, VectorStoreIndex
from llama_index.embeddings.gemini import GeminiEmbedding

reader = easyocr.Reader(['en'])

llm = Gemini(api_key=os.getenv('GEMINI_API_KEY'), model_name="models/gemini-2.0-flash")
gemini_embedding_model = GeminiEmbedding(api_key=os.getenv('GEMINI_API_KEY'), model_name="models/embedding-001")

# Set Global settings
Settings.llm = llm
Settings.embed_model = gemini_embedding_model

def inference(img_path, width_ths):
    output = reader.readtext(img_path, detail=0, slope_ths=0.7, ycenter_ths=0.9,
                          height_ths=0.8, width_ths=width_ths, add_margin=0.2)
    
    output = "\n".join(output)
    
    # create a Document object from the extracted text
    doc = Document(text = output)

    # Create an index from the documents and save it to the disk.
    index = VectorStoreIndex.from_documents([doc])

    # save the index
    index.storage_context.persist(persist_dir = "./receiptsembeddings")
    
    return output

title = "Receipt RAG"
description = "A simple Gradio interface to query receipts using RAG"
examples = [["data/receipt_00000.JPG", 7.7],
            ["data/receipt_00001.jpg", 7.7]]

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(f"# {title}\n{description}")
    with gr.Row():
        with gr.Column():
            image = gr.Image(width=320, height=320, label="Input Receipt")
            width_ths = gr.Slider(0, 10, 7.7, 0.1, label="Width Threshold to merge bounding boxes")
            with gr.Row():
                clear_btn = gr.ClearButton(components=[image, width_ths])
                submit_btn = gr.Button("Submit", variant='primary')
        with gr.Column():
            ocr_out = gr.Textbox(label="OCR Output", type="text")

    submit_btn.click(inference, inputs=[image, width_ths], outputs=ocr_out)
    clear_btn.click(lambda: [None, 7.7], inputs=[image, width_ths])

    examples_obj = gr.Examples(examples=examples, inputs=[image, width_ths])

# demo = gr.Interface(inference, 
#                     inputs = [gr.Image(width=320, height=320, label="Input Receipt"), 
#                               gr.Slider(0, 10, 7.7, 0.1, label="Width Threshold to merge bounding boxes")],
#                     outputs= [gr.Textbox(label="OCR Output", type="text")],
#                     title=title,
#                     description=description,
#                     examples=examples)

demo.launch()