Spaces:
Sleeping
Sleeping
File size: 2,761 Bytes
eb1ac6d a2d9609 eb1ac6d 3a8b68a a2d9609 eb1ac6d 3a8b68a a2d9609 dc7c4f3 a2d9609 dc7c4f3 eb1ac6d 0f8ba6a 3a8b68a a2d9609 ea06666 a2d9609 0e04d08 2570fea 0bea06b 3b5d38c 7413100 3b5d38c 6f9cae9 d1176bd 3b5d38c 2570fea 3a8b68a 2570fea 3a8b68a 4911842 a2d9609 950178e a2d9609 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import os
import easyocr
import gradio as gr
from PIL import Image
from llama_index.core import Settings
from llama_index.llms.gemini import Gemini
from llama_index.core import Document, VectorStoreIndex
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.core import load_index_from_storage, StorageContext
reader = easyocr.Reader(['en'])
llm = Gemini(api_key=os.getenv('GEMINI_API_KEY'), model_name="models/gemini-2.0-flash")
gemini_embedding_model = GeminiEmbedding(api_key=os.getenv('GEMINI_API_KEY'), model_name="models/embedding-001")
# Set Global settings
Settings.llm = llm
Settings.embed_model = gemini_embedding_model
def ocr_inference(img_path, width_ths):
output = reader.readtext(img_path, detail=0, slope_ths=0.7, ycenter_ths=0.9,
height_ths=0.8, width_ths=width_ths, add_margin=0.2)
output = "\n".join(output)
doc = Document(text = output)
index = VectorStoreIndex.from_documents([doc])
index.storage_context.persist(persist_dir = "./receiptsembeddings")
return output
def inference(question):
persist_dir = "./receiptsembeddings"
storage_context = StorageContext.from_defaults(persist_dir = persist_dir)
index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine()
response = query_engine.query(question)
return response
title = "Receipt RAG"
description = "A simple Gradio interface to query receipts using RAG"
examples = [["data/receipt_00000.JPG", 7.7],
["data/receipt_00001.jpg", 7.7]]
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(f"# {title}\n{description}")
with gr.Row():
with gr.Column():
image = gr.Image(width=320, height=320, label="Input Receipt")
width_ths = gr.Slider(0, 10, 7.7, 0.1, label="Width Threshold to Merge Bounding Boxes")
with gr.Row():
clear_btn = gr.ClearButton(components=[image, width_ths])
submit_btn = gr.Button("Submit", variant='primary')
with gr.Column():
ocr_out = gr.Textbox(label="OCR Output", type="text")
submit_btn.click(ocr_inference, inputs=[image, width_ths], outputs=ocr_out)
with gr.Row():
with gr.Column():
text = gr.Textbox(label="Question", type="text")
with gr.Row():
chat_clear_btn = gr.ClearButton(components=[text])
chat_submit_btn = gr.Button("Submit", variant='primary')
with gr.Column():
chat_out = gr.Textbox(label="Response", type="text")
chat_submit_btn.click(inference, inputs=[text], outputs=[chat_out])
examples_obj = gr.Examples(examples=examples, inputs=[image, width_ths])
demo.launch()
|