File size: 2,761 Bytes
eb1ac6d
a2d9609
 
 
eb1ac6d
 
 
 
3a8b68a
a2d9609
 
 
eb1ac6d
 
 
 
 
 
 
3a8b68a
a2d9609
 
dc7c4f3
a2d9609
dc7c4f3
eb1ac6d
 
 
 
 
 
0f8ba6a
 
3a8b68a
 
 
 
 
 
 
 
 
 
 
 
a2d9609
 
 
ea06666
a2d9609
0e04d08
2570fea
0bea06b
3b5d38c
 
7413100
3b5d38c
6f9cae9
d1176bd
3b5d38c
 
2570fea
3a8b68a
2570fea
3a8b68a
 
 
 
 
 
 
 
 
4911842
a2d9609
950178e
 
a2d9609
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
import easyocr
import gradio as gr
from PIL import Image
from llama_index.core import Settings
from llama_index.llms.gemini import Gemini
from llama_index.core import Document, VectorStoreIndex
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.core import load_index_from_storage, StorageContext

reader = easyocr.Reader(['en'])

llm = Gemini(api_key=os.getenv('GEMINI_API_KEY'), model_name="models/gemini-2.0-flash")
gemini_embedding_model = GeminiEmbedding(api_key=os.getenv('GEMINI_API_KEY'), model_name="models/embedding-001")

# Set Global settings
Settings.llm = llm
Settings.embed_model = gemini_embedding_model

def ocr_inference(img_path, width_ths):
    output = reader.readtext(img_path, detail=0, slope_ths=0.7, ycenter_ths=0.9,
                          height_ths=0.8, width_ths=width_ths, add_margin=0.2)
    
    output = "\n".join(output)
    
    doc = Document(text = output)

    index = VectorStoreIndex.from_documents([doc])

    index.storage_context.persist(persist_dir = "./receiptsembeddings")
    
    return output

def inference(question):
    persist_dir = "./receiptsembeddings"

    storage_context = StorageContext.from_defaults(persist_dir = persist_dir)
    index = load_index_from_storage(storage_context)

    query_engine = index.as_query_engine()

    response = query_engine.query(question)

    return response

title = "Receipt RAG"
description = "A simple Gradio interface to query receipts using RAG"
examples = [["data/receipt_00000.JPG", 7.7],
            ["data/receipt_00001.jpg", 7.7]]

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(f"# {title}\n{description}")
    with gr.Row():
        with gr.Column():
            image = gr.Image(width=320, height=320, label="Input Receipt")
            width_ths = gr.Slider(0, 10, 7.7, 0.1, label="Width Threshold to Merge Bounding Boxes")
            with gr.Row():
                clear_btn = gr.ClearButton(components=[image, width_ths])
                submit_btn = gr.Button("Submit", variant='primary')
        with gr.Column():
            ocr_out = gr.Textbox(label="OCR Output", type="text")

    submit_btn.click(ocr_inference, inputs=[image, width_ths], outputs=ocr_out)

    with gr.Row():
        with gr.Column():
            text = gr.Textbox(label="Question", type="text")
            with gr.Row():
                chat_clear_btn = gr.ClearButton(components=[text])
                chat_submit_btn = gr.Button("Submit", variant='primary')
        with gr.Column():
            chat_out = gr.Textbox(label="Response", type="text")
    
    chat_submit_btn.click(inference, inputs=[text], outputs=[chat_out])

    examples_obj = gr.Examples(examples=examples, inputs=[image, width_ths])

demo.launch()